Update LLVM to r89205.

author: rdivacky <rdivacky@FreeBSD.org> 2009-11-18 14:58:34 +0000
committer: rdivacky <rdivacky@FreeBSD.org> 2009-11-18 14:58:34 +0000
commit: d2e985fd323c167e20f77b045a1d99ad166e65db (patch)
tree: 6a111e552c75afc66228e3d8f19b6731e4013f10
parent: ded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff)
download: FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip
FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz
637 files changed, 17246 insertions, 7349 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b30309..839def7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,9 @@ else( MSVC )
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 endif( MSVC )
 
+set(C_INCLUDE_DIRS "" CACHE STRING
+  "Colon separated list of directories clang will search for headers.")
+
 set(LLVM_TARGET_ARCH "host"
   CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.")
 
@@ -164,13 +167,19 @@ option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
 
 set(ENABLE_PIC 0)
 if( LLVM_ENABLE_PIC )
-  if( SUPPORTS_FPIC_FLAG )
-    message(STATUS "Building with -fPIC")
-    add_llvm_definitions(-fPIC)
-    set(ENABLE_PIC 1)
- else( SUPPORTS_FPIC_FLAG )
-    message(STATUS "Warning: -fPIC not supported.")
-  endif()
+ if( XCODE )
+   # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
+   # know how to disable this, so just force ENABLE_PIC off for now.
+   message(STATUS "Warning: -fPIC not supported with Xcode.")
+ else( XCODE )
+   if( SUPPORTS_FPIC_FLAG )
+      message(STATUS "Building with -fPIC")
+      add_llvm_definitions(-fPIC)
+      set(ENABLE_PIC 1)
+   else( SUPPORTS_FPIC_FLAG )
+      message(STATUS "Warning: -fPIC not supported.")
+   endif()
+ endif()
 endif()
 
 set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR} )
diff --git a/Makefile b/Makefile
index 671f92c..31c1b69 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,11 @@ ifeq ($(BUILD_DIRS_ONLY),1)
 else
   DIRS := lib/System lib/Support utils lib/VMCore lib tools/llvm-config \
           tools runtime docs unittests
-  OPTIONAL_DIRS := examples projects bindings
+  OPTIONAL_DIRS := projects bindings
+endif
+
+ifeq ($(BUILD_EXAMPLES),1)
+  OPTIONAL_DIRS += examples
 endif
 
 EXTRA_DIST := test unittests llvm.spec include win32 Xcode
diff --git a/Makefile.config.in b/Makefile.config.in
index 2b9bbfd..44296a4 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -313,6 +313,12 @@ endif
 # Location of the plugin header file for gold.
 BINUTILS_INCDIR := @BINUTILS_INCDIR@
 
+C_INCLUDE_DIRS := @C_INCLUDE_DISR@
+CXX_INCLUDE_ROOT := @CXX_INCLUDE_ROOT@
+CXX_INCLUDE_ARCH := @CXX_INCLUDE_ARCH@
+CXX_INCLUDE_32BIT_DIR = @CXX_INCLUDE_32BIT_DIR@
+CXX_INCLUDE_64BIT_DIR = @CXX_INCLUDE_64BIT_DIR@
+
 # When ENABLE_LLVMC_DYNAMIC is enabled, LLVMC will link libCompilerDriver
 # dynamically. This is needed to make dynamic plugins work on some targets
 # (Windows).
diff --git a/Makefile.rules b/Makefile.rules
index b4a5a37..6bda564 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -338,11 +338,19 @@ ifeq ($(ENABLE_OPTIMIZED),1)
     KEEP_SYMBOLS := 1
   endif
 else
-  BuildMode := Debug
-  CXX.Flags += -g
-  C.Flags   += -g
-  LD.Flags  += -g
-  KEEP_SYMBOLS := 1
+  ifdef NO_DEBUG_SYMBOLS
+    BuildMode := Unoptimized
+    CXX.Flags +=
+    C.Flags   +=
+    LD.Flags  +=
+    KEEP_SYMBOLS := 1
+  else
+    BuildMode := Debug
+    CXX.Flags += -g
+    C.Flags   += -g
+    LD.Flags  += -g
+    KEEP_SYMBOLS := 1
+  endif
 endif
 
 ifeq ($(ENABLE_PROFILING),1)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 7d0a9bb..e125112 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -667,6 +667,41 @@ case "$withval" in
   *) AC_MSG_ERROR([Invalid path for --with-ocaml-libdir. Provide full path]) ;;
 esac
 
+AC_ARG_WITH(c-include-dir,
+  AS_HELP_STRING([--with-c-include-dirs],
+    [Colon separated list of directories clang will search for headers]),,
+    withval="")
+AC_DEFINE_UNQUOTED(C_INCLUDE_DIRS,"$withval",
+                   [Directories clang will search for headers])
+
+AC_ARG_WITH(cxx-include-root,
+  AS_HELP_STRING([--with-cxx-include-root],
+    [Directory with the libstdc++ headers.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_ROOT,"$withval",
+                   [Directory with the libstdc++ headers.])
+
+AC_ARG_WITH(cxx-include-arch,
+  AS_HELP_STRING([--with-cxx-include-arch],
+    [Architecture of the libstdc++ headers.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_ARCH,"$withval",
+                   [Arch the libstdc++ headers.])
+
+AC_ARG_WITH(cxx-include-32bit-dir,
+  AS_HELP_STRING([--with-cxx-include-32bit-dir],
+    [32 bit multilib dir.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_32BIT_DIR,"$withval",
+                   [32 bit multilib directory.])
+
+AC_ARG_WITH(cxx-include-64bit-dir,
+  AS_HELP_STRING([--with-cxx-include-64bit-dir],
+    [64 bit multilib directory.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_64BIT_DIR,"$withval",
+                   [64 bit multilib directory.])
+
 dnl Allow linking of LLVM with GPLv3 binutils code.
 AC_ARG_WITH(binutils-include,
   AS_HELP_STRING([--with-binutils-include],
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index acf3a2e..3a2b91c 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -4,6 +4,11 @@ include(CheckSymbolExists)
 include(CheckFunctionExists)
 include(CheckCXXSourceCompiles)
 
+if( UNIX )
+  # Used by check_symbol_exists:
+  set(CMAKE_REQUIRED_LIBRARIES m)
+endif()
+
 # Helper macros and functions
 macro(add_cxx_include result files)
   set(${result} "")
@@ -39,7 +44,9 @@ check_include_file(malloc.h HAVE_MALLOC_H)
 check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
 check_include_file(memory.h HAVE_MEMORY_H)
 check_include_file(ndir.h HAVE_NDIR_H)
-check_include_file(pthread.h HAVE_PTHREAD_H)
+if( NOT LLVM_ON_WIN32 )
+  check_include_file(pthread.h HAVE_PTHREAD_H)
+endif()
 check_include_file(setjmp.h HAVE_SETJMP_H)
 check_include_file(signal.h HAVE_SIGNAL_H)
 check_include_file(stdint.h HAVE_STDINT_H)
@@ -63,10 +70,12 @@ check_include_file(utime.h HAVE_UTIME_H)
 check_include_file(windows.h HAVE_WINDOWS_H)
 
 # library checks
-check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
-check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC)
-check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT)
-check_library_exists(dl dlopen "" HAVE_LIBDL)
+if( NOT LLVM_ON_WIN32 )
+  check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
+  check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC)
+  check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT)
+  check_library_exists(dl dlopen "" HAVE_LIBDL)
+endif()
 
 # function checks
 check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
@@ -80,13 +89,16 @@ check_symbol_exists(isnan cmath HAVE_ISNAN_IN_CMATH)
 check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
 check_symbol_exists(ceilf math.h HAVE_CEILF)
 check_symbol_exists(floorf math.h HAVE_FLOORF)
+check_symbol_exists(nearbyintf math.h HAVE_NEARBYINTF)
 check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
 check_symbol_exists(malloc_zone_statistics malloc/malloc.h
                     HAVE_MALLOC_ZONE_STATISTICS)
 check_symbol_exists(mkdtemp "stdlib.h;unistd.h" HAVE_MKDTEMP)
 check_symbol_exists(mkstemp "stdlib.h;unistd.h" HAVE_MKSTEMP)
 check_symbol_exists(mktemp "stdlib.h;unistd.h" HAVE_MKTEMP)
-check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
+if( NOT LLVM_ON_WIN32 )
+  check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
+endif()
 check_symbol_exists(sbrk unistd.h HAVE_SBRK)
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
 check_symbol_exists(strerror string.h HAVE_STRERROR)
@@ -120,6 +132,27 @@ endif()
 check_type_exists(uint64_t "${headers}" HAVE_UINT64_T)
 check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T)
 
+# available programs checks
+function(llvm_find_program name)
+  string(TOUPPER ${name} NAME)
+  find_program(LLVM_PATH_${NAME} ${name})
+  mark_as_advanced(LLVM_PATH_${NAME})
+  if(LLVM_PATH_${NAME})
+    set(HAVE_${NAME} 1 CACHE INTERNAL "Is ${name} available ?")
+    mark_as_advanced(HAVE_${NAME})
+  else(LLVM_PATH_${NAME})
+    set(HAVE_${NAME} "" CACHE INTERNAL "Is ${name} available ?")
+  endif(LLVM_PATH_${NAME})
+endfunction()
+
+llvm_find_program(gv)
+llvm_find_program(circo)
+llvm_find_program(twopi)
+llvm_find_program(neato)
+llvm_find_program(fdp)
+llvm_find_program(dot)
+llvm_find_program(dotty)
+
 # Define LLVM_MULTITHREADED if gcc atomic builtins exists.
 include(CheckAtomic)
 
@@ -132,7 +165,9 @@ endif()
 
 include(GetTargetTriple)
 get_target_triple(LLVM_HOSTTRIPLE)
-message(STATUS "LLVM_HOSTTRIPLE: ${LLVM_HOSTTRIPLE}")
+
+# FIXME: We don't distinguish the target and the host. :(
+set(TARGET_TRIPLE "${LLVM_HOSTTRIPLE}")
 
 # Determine the native architecture.
 string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH)
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 205ddb7..c0ce897 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -22,6 +22,28 @@ macro(add_llvm_library name)
 endmacro(add_llvm_library name)
 
 
+macro(add_llvm_loadable_module name)
+  if( NOT LLVM_ON_UNIX )
+    message(STATUS "Loadable modules not supported on this platform.
+${name} ignored.")
+  else()
+    llvm_process_sources( ALL_FILES ${ARGN} )
+    add_library( ${name} MODULE ${ALL_FILES} )
+    set_target_properties( ${name} PROPERTIES PREFIX "" )
+
+    if (APPLE)
+      # Darwin-specific linker flags for loadable modules.
+      set_target_properties(${name} PROPERTIES
+        LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
+    endif()
+
+    install(TARGETS ${name}
+      LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+      ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+  endif()
+endmacro(add_llvm_loadable_module name)
+
+
 macro(add_llvm_executable name)
   llvm_process_sources( ALL_FILES ${ARGN} )
   add_executable(${name} ${ALL_FILES})
diff --git a/cmake/modules/GetTargetTriple.cmake b/cmake/modules/GetTargetTriple.cmake
index 87262ad..ac0c009 100644
--- a/cmake/modules/GetTargetTriple.cmake
+++ b/cmake/modules/GetTargetTriple.cmake
@@ -4,12 +4,12 @@
 function( get_target_triple var )
   if( MSVC )
     if( CMAKE_CL_64 )
-      set( ${var} "x86_64-pc-win32" PARENT_SCOPE )
+      set( value "x86_64-pc-win32" )
     else()
-      set( ${var} "i686-pc-win32" PARENT_SCOPE )
+      set( value "i686-pc-win32" )
     endif()
   elseif( MINGW AND NOT MSYS )
-    set( ${var} "i686-pc-mingw32" PARENT_SCOPE )
+    set( value "i686-pc-mingw32" )
   else( MSVC )
     set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess)
     execute_process(COMMAND sh ${config_guess}
@@ -19,7 +19,8 @@ function( get_target_triple var )
     if( NOT TT_RV EQUAL 0 )
       message(FATAL_ERROR "Failed to execute ${config_guess}")
     endif( NOT TT_RV EQUAL 0 )
-    set( ${var} ${TT_OUT} PARENT_SCOPE )
-    message(STATUS "Target triple: ${${var}}")
+    set( value ${TT_OUT} )
   endif( MSVC )
+  set( ${var} ${value} PARENT_SCOPE )
+  message(STATUS "Target triple: ${value}")
 endfunction( get_target_triple var )
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index 655e23e..1338d56 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -24,7 +24,6 @@ set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMHello LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget)
diff --git a/configure b/configure
index e671329..094829b 100755
--- a/configure
+++ b/configure
@@ -1603,6 +1603,14 @@ Optional Packages:
   --with-extra-options    Specify additional options to compile LLVM with
   --with-ocaml-libdir     Specify install location for ocaml bindings (default
                           is stdlib)
+  --with-c-include-dirs   Colon separated list of directories clang will
+                          search for headers
+  --with-cxx-include-root Directory with the libstdc++ headers.
+  --with-cxx-include-arch Architecture of the libstdc++ headers.
+  --with-cxx-include-32bit-dir
+                          32 bit multilib dir.
+  --with-cxx-include-64bit-dir
+                          64 bit multilib directory.
   --with-binutils-include Specify path to binutils/include/ containing
                           plugin-api.h file for gold plugin.
   --with-tclinclude       directory where tcl headers are
@@ -5272,6 +5280,76 @@ echo "$as_me: error: Invalid path for --with-ocaml-libdir. Provide full path" >&
 esac
 
 
+# Check whether --with-c-include-dir was given.
+if test "${with_c_include_dir+set}" = set; then
+  withval=$with_c_include_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define C_INCLUDE_DIRS "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-root was given.
+if test "${with_cxx_include_root+set}" = set; then
+  withval=$with_cxx_include_root;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_ROOT "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-arch was given.
+if test "${with_cxx_include_arch+set}" = set; then
+  withval=$with_cxx_include_arch;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_ARCH "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-32bit-dir was given.
+if test "${with_cxx_include_32bit_dir+set}" = set; then
+  withval=$with_cxx_include_32bit_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_32BIT_DIR "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-64bit-dir was given.
+if test "${with_cxx_include_64bit_dir+set}" = set; then
+  withval=$with_cxx_include_64bit_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_64BIT_DIR "$withval"
+_ACEOF
+
+
+
 # Check whether --with-binutils-include was given.
 if test "${with_binutils_include+set}" = set; then
   withval=$with_binutils_include;
@@ -11036,7 +11114,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11039 "configure"
+#line 11117 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -13180,7 +13258,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 13183 "configure"' > conftest.$ac_ext
+  echo '#line 13261 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -14898,11 +14976,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14901: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14979: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14905: \$? = $ac_status" >&5
+   echo "$as_me:14983: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -15166,11 +15244,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15169: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15247: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:15173: \$? = $ac_status" >&5
+   echo "$as_me:15251: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -15270,11 +15348,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15273: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15351: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:15277: \$? = $ac_status" >&5
+   echo "$as_me:15355: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -17722,7 +17800,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17725 "configure"
+#line 17803 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17822,7 +17900,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17825 "configure"
+#line 17903 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -20190,11 +20268,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:20193: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:20271: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:20197: \$? = $ac_status" >&5
+   echo "$as_me:20275: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -20294,11 +20372,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:20297: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:20375: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:20301: \$? = $ac_status" >&5
+   echo "$as_me:20379: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -21864,11 +21942,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21867: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21945: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:21871: \$? = $ac_status" >&5
+   echo "$as_me:21949: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -21968,11 +22046,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21971: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:22049: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:21975: \$? = $ac_status" >&5
+   echo "$as_me:22053: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -24203,11 +24281,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24206: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24284: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24210: \$? = $ac_status" >&5
+   echo "$as_me:24288: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24471,11 +24549,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24474: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24552: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24478: \$? = $ac_status" >&5
+   echo "$as_me:24556: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24575,11 +24653,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24578: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24656: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:24582: \$? = $ac_status" >&5
+   echo "$as_me:24660: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
index 97c3e57..246fc66 100644
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@@ -36,6 +36,9 @@ Finally, B<lit> also supports additional options for only running a subset of
 the options specified on the command line, see L<"SELECTION OPTIONS"> for
 more information.
 
+Users interested in the B<lit> architecture or designing a B<lit> testing
+implementation should see L<"LIT ARCHITECTURE">
+
 =head1 GENERAL OPTIONS
 
 =over
@@ -146,6 +149,11 @@ List the discovered test suites as part of the standard output.
 
 Run Tcl scripts internally (instead of converting to shell scripts).
 
+=item B<--repeat>=I<N>
+
+Run each test I<N> times. Currently this is primarily useful for timing tests,
+other results are not collated in any reasonable fashion.
+
 =back
 
 =head1 EXIT STATUS
@@ -222,6 +230,119 @@ Depending on the test format tests may produce additional information about
 their status (generally only for failures). See the L<Output|"LIT OUTPUT">
 section for more information.
 
+=head1 LIT INFRASTRUCTURE
+
+This section describes the B<lit> testing architecture for users interested in
+creating a new B<lit> testing implementation, or extending an existing one.
+
+B<lit> proper is primarily an infrastructure for discovering and running
+arbitrary tests, and to expose a single convenient interface to these
+tests. B<lit> itself doesn't contain know how to run tests, rather this logic is
+defined by I<test suites>.
+
+=head2 TEST SUITES
+
+As described in L<"TEST DISCOVERY">, tests are always located inside a I<test
+suite>. Test suites serve to define the format of the tests they contain, the
+logic for finding those tests, and any additional information to run the tests.
+
+B<lit> identifies test suites as directories containing I<lit.cfg> or
+I<lit.site.cfg> files (see also B<--config-prefix>. Test suites are initially
+discovered by recursively searching up the directory hierarchy for all the input
+files passed on the command line. You can use B<--show-suites> to display the
+discovered test suites at startup.
+
+Once a test suite is discovered, its config file is loaded. Config files
+themselves are just Python modules which will be executed. When the config file
+is executed, two important global variables are predefined:
+
+=over
+
+=item B<lit>
+
+The global B<lit> configuration object (a I<LitConfig> instance), which defines
+the builtin test formats, global configuration parameters, and other helper
+routines for implementing test configurations.
+
+=item B<config>
+
+This is the config object (a I<TestingConfig> instance) for the test suite,
+which the config file is expected to populate. The following variables are also
+available on the I<config> object, some of which must be set by the config and
+others are optional or predefined:
+
+B<name> I<[required]> The name of the test suite, for use in reports and
+diagnostics.
+
+B<test_format> I<[required]> The test format object which will be used to
+discover and run tests in the test suite. Generally this will be a builtin test
+format available from the I<lit.formats> module.
+
+B<test_src_root> The filesystem path to the test suite root. For out-of-dir
+builds this is the directory that will be scanned for tests.
+
+B<test_exec_root> For out-of-dir builds, the path to the test suite root inside
+the object directory. This is where tests will be run and temporary output files
+places.
+
+B<environment> A dictionary representing the environment to use when executing
+tests in the suite.
+
+B<suffixes> For B<lit> test formats which scan directories for tests, this
+variable as a list of suffixes to identify test files. Used by: I<ShTest>,
+I<TclTest>.
+
+B<substitutions> For B<lit> test formats which substitute variables into a test
+script, the list of substitutions to perform. Used by: I<ShTest>, I<TclTest>.
+
+B<unsupported> Mark an unsupported directory, all tests within it will be
+reported as unsupported. Used by: I<ShTest>, I<TclTest>.
+
+B<parent> The parent configuration, this is the config object for the directory
+containing the test suite, or None.
+
+B<on_clone> The config is actually cloned for every subdirectory inside a test
+suite, to allow local configuration on a per-directory basis. The I<on_clone>
+variable can be set to a Python function which will be called whenever a
+configuration is cloned (for a subdirectory). The function should takes three
+arguments: (1) the parent configuration, (2) the new configuration (which the
+I<on_clone> function will generally modify), and (3) the test path to the new
+directory being scanned.
+
+=back
+
+=head2 TEST DISCOVERY
+
+Once test suites are located, B<lit> recursively traverses the source directory
+(following I<test_src_root>) looking for tests. When B<lit> enters a
+sub-directory, it first checks to see if a nest test suite is defined in that
+directory. If so, it loads that test suite recursively, otherwise it
+instantiates a local test config for the directory (see L<"LOCAL CONFIGURATION
+FILES">).
+
+Tests are identified by the test suite they are contained within, and the
+relative path inside that suite. Note that the relative path may not refer to an
+actual file on disk; some test formats (such as I<GoogleTest>) define "virtual
+tests" which have a path that contains both the path to the actual test file and
+a subpath to identify the virtual test.
+
+=head2 LOCAL CONFIGURATION FILES
+
+When B<lit> loads a subdirectory in a test suite, it instantiates a local test
+configuration by cloning the configuration for the parent direction -- the root
+of this configuration chain will always be a test suite. Once the test
+configuration is cloned B<lit> checks for a I<lit.local.cfg> file in the
+subdirectory. If present, this file will be loaded and can be used to specialize
+the configuration for each individual directory. This facility can be used to
+define subdirectories of optional tests, or to change other configuration
+parameters -- for example, to change the test format, or the suffixes which
+identify test files.
+
+=head2 LIT EXAMPLE TESTS
+
+The B<lit> distribution contains several example implementations of test suites
+in the I<ExampleTests> directory.
+
 =head1 SEE ALSO
 
 L<valgrind(1)>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index c06a88b..a417db0 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -1215,6 +1215,13 @@ target datalayout = "<i>layout specification</i>"
   <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for a stack object of a given bit
       <i>size</i>.</dd>
+
+  <dt><tt>n<i>size1</i>:<i>size2</i>:<i>size3</i>...</tt></dt>
+  <dd>This specifies a set of native integer widths for the target CPU
+      in bits.  For example, it might contain "n32" for 32-bit PowerPC,
+      "n32:64" for PowerPC 64, or "n8:16:32:64" for X86-64.  Elements of
+      this set are considered to support most general arithmetic 
+      operations efficiently.</dd>
 </dl>
 
 <p>When constructing the data layout for a given target, LLVM starts with a
@@ -1569,12 +1576,12 @@ Classifications</a> </div>
   </tr>
 </table>
 
-<p>Note that 'variable sized arrays' can be implemented in LLVM with a zero
-   length array.  Normally, accesses past the end of an array are undefined in
-   LLVM (e.g. it is illegal to access the 5th element of a 3 element array).  As
-   a special case, however, zero length arrays are recognized to be variable
-   length.  This allows implementation of 'pascal style arrays' with the LLVM
-   type "<tt>{ i32, [0 x float]}</tt>", for example.</p>
+<p>There is no restriction on indexing beyond the end of the array implied by
+   a static type (though there are restrictions on indexing beyond the bounds
+   of an allocated object in some cases). This means that single-dimension
+   'variable sized array' addressing can be implemented in LLVM with a zero
+   length array type. An implementation of 'pascal style arrays' in LLVM could
+   use the type "<tt>{ i32, [0 x float]}</tt>", for example.</p>
 
 <p>Note that the code generator does not yet support large aggregate types to be
    used as function return types. The specific limit on how large an aggregate
@@ -7278,7 +7285,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-11-02 01:25:26 +0100 (Mon, 02 Nov 2009) $
+  Last modified: $Date: 2009-11-09 20:01:53 +0100 (Mon, 09 Nov 2009) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 277b1e3..9b7571a 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -774,7 +774,7 @@ DW_TAG_return_variable = 258
 <p>This intrinsic is used to provide correspondence between the source file and
    the generated code.  The first argument is the line number (base 1), second
    argument is the column number (0 if unknown) and the third argument the
-   source <tt>%<a href="#format_compile_units">llvm.dbg.compile_unit</a>.
+   source <tt>%<a href="#format_compile_units">llvm.dbg.compile_unit</a></tt>.
    Code following a call to this intrinsic will
    have been defined in close proximity of the line, column and file. This
    information holds until the next call
@@ -1813,7 +1813,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
+  Last modified: $Date: 2009-11-17 14:13:59 +0100 (Tue, 17 Nov 2009) $
 </address>
 
 </body>
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index e52a1d1..2c5a371 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -31,9 +31,6 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
 /** See llvm::createCFGSimplificationPass function. */
 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
 
-/** See llvm::createCondPropagationPass function. */
-void LLVMAddCondPropagationPass(LLVMPassManagerRef PM);
-
 /** See llvm::createDeadStoreEliminationPass function. */
 void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM);
 
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 52354b7..8329947 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -14,8 +14,9 @@
 #ifndef LLVM_ADT_DENSEMAP_H
 #define LLVM_ADT_DENSEMAP_H
 
-#include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include <iterator>
 #include <new>
@@ -27,12 +28,8 @@ namespace llvm {
 
 template<typename KeyT, typename ValueT,
          typename KeyInfoT = DenseMapInfo<KeyT>,
-         typename ValueInfoT = DenseMapInfo<ValueT> >
+         typename ValueInfoT = DenseMapInfo<ValueT>, bool IsConst = false>
 class DenseMapIterator;
-template<typename KeyT, typename ValueT,
-         typename KeyInfoT = DenseMapInfo<KeyT>,
-         typename ValueInfoT = DenseMapInfo<ValueT> >
-class DenseMapConstIterator;
 
 template<typename KeyT, typename ValueT,
          typename KeyInfoT = DenseMapInfo<KeyT>,
@@ -73,7 +70,8 @@ public:
   }
 
   typedef DenseMapIterator<KeyT, ValueT, KeyInfoT> iterator;
-  typedef DenseMapConstIterator<KeyT, ValueT, KeyInfoT> const_iterator;
+  typedef DenseMapIterator<KeyT, ValueT,
+                           KeyInfoT, ValueInfoT, true> const_iterator;
   inline iterator begin() {
      return iterator(Buckets, Buckets+NumBuckets);
   }
@@ -426,32 +424,47 @@ private:
   }
 };
 
-template<typename KeyT, typename ValueT, typename KeyInfoT, typename ValueInfoT>
-class DenseMapIterator : 
-      public std::iterator<std::forward_iterator_tag, std::pair<KeyT, ValueT>,
-                          ptrdiff_t> {
-  typedef std::pair<KeyT, ValueT> BucketT;
-protected:
-  const BucketT *Ptr, *End;
+template<typename KeyT, typename ValueT,
+         typename KeyInfoT, typename ValueInfoT, bool IsConst>
+class DenseMapIterator {
+  typedef std::pair<KeyT, ValueT> Bucket;
+  typedef DenseMapIterator<KeyT, ValueT,
+                           KeyInfoT, ValueInfoT, true> ConstIterator;
+  friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, ValueInfoT, true>;
+public:
+  typedef ptrdiff_t difference_type;
+  typedef typename conditional<IsConst, const Bucket, Bucket>::type value_type;
+  typedef value_type *pointer;
+  typedef value_type &reference;
+  typedef std::forward_iterator_tag iterator_category;
+private:
+  pointer Ptr, End;
 public:
   DenseMapIterator() : Ptr(0), End(0) {}
 
-  DenseMapIterator(const BucketT *Pos, const BucketT *E) : Ptr(Pos), End(E) {
+  DenseMapIterator(pointer Pos, pointer E) : Ptr(Pos), End(E) {
     AdvancePastEmptyBuckets();
   }
 
-  std::pair<KeyT, ValueT> &operator*() const {
-    return *const_cast<BucketT*>(Ptr);
+  // If IsConst is true this is a converting constructor from iterator to
+  // const_iterator and the default copy constructor is used.
+  // Otherwise this is a copy constructor for iterator.
+  DenseMapIterator(const DenseMapIterator<KeyT, ValueT,
+                                          KeyInfoT, ValueInfoT, false>& I)
+    : Ptr(I.Ptr), End(I.End) {}
+
+  reference operator*() const {
+    return *Ptr;
   }
-  std::pair<KeyT, ValueT> *operator->() const {
-    return const_cast<BucketT*>(Ptr);
+  pointer operator->() const {
+    return Ptr;
   }
 
-  bool operator==(const DenseMapIterator &RHS) const {
-    return Ptr == RHS.Ptr;
+  bool operator==(const ConstIterator &RHS) const {
+    return Ptr == RHS.operator->();
   }
-  bool operator!=(const DenseMapIterator &RHS) const {
-    return Ptr != RHS.Ptr;
+  bool operator!=(const ConstIterator &RHS) const {
+    return Ptr != RHS.operator->();
   }
 
   inline DenseMapIterator& operator++() {  // Preincrement
@@ -475,22 +488,6 @@ private:
   }
 };
 
-template<typename KeyT, typename ValueT, typename KeyInfoT, typename ValueInfoT>
-class DenseMapConstIterator : public DenseMapIterator<KeyT, ValueT, KeyInfoT> {
-public:
-  DenseMapConstIterator() : DenseMapIterator<KeyT, ValueT, KeyInfoT>() {}
-  DenseMapConstIterator(const std::pair<KeyT, ValueT> *Pos,
-                        const std::pair<KeyT, ValueT> *E)
-    : DenseMapIterator<KeyT, ValueT, KeyInfoT>(Pos, E) {
-  }
-  const std::pair<KeyT, ValueT> &operator*() const {
-    return *this->Ptr;
-  }
-  const std::pair<KeyT, ValueT> *operator->() const {
-    return this->Ptr;
-  }
-};
-
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index 2d103cf..0fd1f50 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -30,7 +30,7 @@ struct GraphTraits {
   // typedef NodeType          - Type of Node in the graph
   // typedef ChildIteratorType - Type used to iterate over children in graph
 
-  // static NodeType *getEntryNode(GraphType *)
+  // static NodeType *getEntryNode(const GraphType &)
   //    Return the entry node of the graph
 
   // static ChildIteratorType child_begin(NodeType *)
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index 742e232..fc9fe8b 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -102,8 +102,8 @@ public:
     }
 
   private:
-    Factory(const Factory& RHS) {};
-    void operator=(const Factory& RHS) {};
+    Factory(const Factory& RHS); // DO NOT IMPLEMENT
+    void operator=(const Factory& RHS); // DO NOT IMPLEMENT
   };
 
   friend class Factory;
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 16b4403..676a198 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -988,8 +988,8 @@ public:
     BumpPtrAllocator& getAllocator() { return F.getAllocator(); }
 
   private:
-    Factory(const Factory& RHS) {}
-    void operator=(const Factory& RHS) {}
+    Factory(const Factory& RHS); // DO NOT IMPLEMENT
+    void operator=(const Factory& RHS); // DO NOT IMPLEMENT
   };
 
   friend class Factory;
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 33f2fcb..49c8940 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -186,8 +186,9 @@ namespace llvm {
     int is() const {
       // Is it PT1/PT2?
       if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.is<InnerUnion>() && Val.get<InnerUnion>().is<T>();
-      return Val.is<T>();
+        return Val.template is<InnerUnion>() && 
+               Val.template get<InnerUnion>().template is<T>();
+      return Val.template is<T>();
     }
     
     /// get<T>() - Return the value of the specified pointer type. If the
@@ -197,9 +198,9 @@ namespace llvm {
       assert(is<T>() && "Invalid accessor called");
       // Is it PT1/PT2?
       if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.get<InnerUnion>().get<T>();
+        return Val.template get<InnerUnion>().template get<T>();
       
-      return Val.get<T>();
+      return Val.template get<T>();
     }
     
     /// dyn_cast<T>() - If the current value is of the specified pointer type,
@@ -291,8 +292,10 @@ namespace llvm {
     int is() const {
       // Is it PT1/PT2?
       if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.is<InnerUnion1>() && Val.get<InnerUnion1>().is<T>();
-      return Val.is<InnerUnion2>() && Val.get<InnerUnion2>().is<T>();
+        return Val.template is<InnerUnion1>() && 
+               Val.template get<InnerUnion1>().template is<T>();
+      return Val.template is<InnerUnion2>() && 
+             Val.template get<InnerUnion2>().template is<T>();
     }
     
     /// get<T>() - Return the value of the specified pointer type. If the
@@ -302,9 +305,9 @@ namespace llvm {
       assert(is<T>() && "Invalid accessor called");
       // Is it PT1/PT2?
       if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.get<InnerUnion1>().get<T>();
+        return Val.template get<InnerUnion1>().template get<T>();
       
-      return Val.get<InnerUnion2>().get<T>();
+      return Val.template get<InnerUnion2>().template get<T>();
     }
     
     /// dyn_cast<T>() - If the current value is of the specified pointer type,
diff --git a/include/llvm/ADT/PriorityQueue.h b/include/llvm/ADT/PriorityQueue.h
index a8809dc..bf8a687 100644
--- a/include/llvm/ADT/PriorityQueue.h
+++ b/include/llvm/ADT/PriorityQueue.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ADT_PRIORITY_QUEUE_H
 #define LLVM_ADT_PRIORITY_QUEUE_H
 
+#include <algorithm>
 #include <queue>
 
 namespace llvm {
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index db985b5..3afcabd 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -136,8 +136,8 @@ public:
   typedef scc_iterator<GraphT, GT> _Self;
 
   // Provide static "constructors"...
-  static inline _Self begin(GraphT& G) { return _Self(GT::getEntryNode(G)); }
-  static inline _Self end  (GraphT& G) { return _Self(); }
+  static inline _Self begin(const GraphT& G) { return _Self(GT::getEntryNode(G)); }
+  static inline _Self end  (const GraphT& G) { return _Self(); }
 
   // Direct loop termination test (I.fini() is more efficient than I == end())
   inline bool fini() const {
@@ -186,15 +186,25 @@ public:
 
 // Global constructor for the SCC iterator.
 template <class T>
-scc_iterator<T> scc_begin(T G) {
+scc_iterator<T> scc_begin(const T& G) {
   return scc_iterator<T>::begin(G);
 }
 
 template <class T>
-scc_iterator<T> scc_end(T G) {
+scc_iterator<T> scc_end(const T& G) {
   return scc_iterator<T>::end(G);
 }
 
+template <class T>
+scc_iterator<Inverse<T> > scc_begin(const Inverse<T>& G) {
+       return scc_iterator<Inverse<T> >::begin(G);
+}
+
+template <class T>
+scc_iterator<Inverse<T> > scc_end(const Inverse<T>& G) {
+       return scc_iterator<Inverse<T> >::end(G);
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index 6f47692..a8b6133 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -270,6 +270,14 @@ static inline void array_pod_sort(IteratorTy Start, IteratorTy End) {
         get_array_pad_sort_comparator(*Start));
 }
 
+template<class IteratorTy>
+static inline void array_pod_sort(IteratorTy Start, IteratorTy End,
+                                  int (*Compare)(const void*, const void*)) {
+  // Don't dereference start iterator of empty sequence.
+  if (Start == End) return;
+  qsort(&*Start, End-Start, sizeof(*Start), Compare);
+}
+  
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 73fd635..86e8546 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -96,12 +96,12 @@ protected:
   /// specified bucket will be non-null.  Otherwise, it will be null.  In either
   /// case, the FullHashValue field of the bucket will be set to the hash value
   /// of the string.
-  unsigned LookupBucketFor(const StringRef &Key);
+  unsigned LookupBucketFor(StringRef Key);
 
   /// FindKey - Look up the bucket that contains the specified key. If it exists
   /// in the map, return the bucket number of the key.  Otherwise return -1.
   /// This does not modify the map.
-  int FindKey(const StringRef &Key) const;
+  int FindKey(StringRef Key) const;
 
   /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
   /// delete it.  This aborts if the value isn't in the table.
@@ -109,7 +109,7 @@ protected:
 
   /// RemoveKey - Remove the StringMapEntry for the specified key from the
   /// table, returning it.  If the key is not in the table, this returns null.
-  StringMapEntryBase *RemoveKey(const StringRef &Key);
+  StringMapEntryBase *RemoveKey(StringRef Key);
 private:
   void init(unsigned Size);
 public:
@@ -282,13 +282,13 @@ public:
     return const_iterator(TheTable+NumBuckets, true);
   }
 
-  iterator find(const StringRef &Key) {
+  iterator find(StringRef Key) {
     int Bucket = FindKey(Key);
     if (Bucket == -1) return end();
     return iterator(TheTable+Bucket);
   }
 
-  const_iterator find(const StringRef &Key) const {
+  const_iterator find(StringRef Key) const {
     int Bucket = FindKey(Key);
     if (Bucket == -1) return end();
     return const_iterator(TheTable+Bucket);
@@ -296,18 +296,18 @@ public:
 
    /// lookup - Return the entry for the specified key, or a default
   /// constructed value if no such entry exists.
-  ValueTy lookup(const StringRef &Key) const {
+  ValueTy lookup(StringRef Key) const {
     const_iterator it = find(Key);
     if (it != end())
       return it->second;
     return ValueTy();
   }
 
-  ValueTy& operator[](const StringRef &Key) {
+  ValueTy& operator[](StringRef Key) {
     return GetOrCreateValue(Key).getValue();
   }
 
-  size_type count(const StringRef &Key) const {
+  size_type count(StringRef Key) const {
     return find(Key) == end() ? 0 : 1;
   }
 
@@ -350,7 +350,7 @@ public:
   /// exists, return it.  Otherwise, default construct a value, insert it, and
   /// return.
   template <typename InitTy>
-  StringMapEntry<ValueTy> &GetOrCreateValue(const StringRef &Key,
+  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key,
                                             InitTy Val) {
     unsigned BucketNo = LookupBucketFor(Key);
     ItemBucket &Bucket = TheTable[BucketNo];
@@ -373,7 +373,7 @@ public:
     return *NewItem;
   }
 
-  StringMapEntry<ValueTy> &GetOrCreateValue(const StringRef &Key) {
+  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key) {
     return GetOrCreateValue(Key, ValueTy());
   }
 
@@ -401,7 +401,7 @@ public:
     V.Destroy(Allocator);
   }
 
-  bool erase(const StringRef &Key) {
+  bool erase(StringRef Key) {
     iterator I = find(Key);
     if (I == end()) return false;
     erase(I);
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 2fa5c66..ed651bf 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -16,6 +16,8 @@
 #include <string>
 
 namespace llvm {
+  template<typename T>
+  class SmallVectorImpl;
 
   /// StringRef - Represent a constant reference to a string, i.e. a character
   /// array and a length, which need not be null terminated.
@@ -46,7 +48,7 @@ namespace llvm {
 
     /// Construct a string ref from a cstring.
     /*implicit*/ StringRef(const char *Str)
-      : Data(Str) { if (Str) Length = ::strlen(Str); else Length = 0; }
+      : Data(Str), Length(::strlen(Str)) {}
 
     /// Construct a string ref from a pointer and length.
     /*implicit*/ StringRef(const char *data, size_t length)
@@ -54,7 +56,7 @@ namespace llvm {
 
     /// Construct a string ref from an std::string.
     /*implicit*/ StringRef(const std::string &Str)
-      : Data(Str.c_str()), Length(Str.length()) {}
+      : Data(Str.data()), Length(Str.length()) {}
 
     /// @}
     /// @name Iterators
@@ -92,14 +94,19 @@ namespace llvm {
 
     /// equals - Check for string equality, this is more efficient than
     /// compare() when the relative ordering of inequal strings isn't needed.
-    bool equals(const StringRef &RHS) const {
+    bool equals(StringRef RHS) const {
       return (Length == RHS.Length &&
               memcmp(Data, RHS.Data, RHS.Length) == 0);
     }
 
+    /// equals_lower - Check for string equality, ignoring case.
+    bool equals_lower(StringRef RHS) const {
+      return Length == RHS.Length && compare_lower(RHS) == 0;
+    }
+
     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
     /// is lexicographically less than, equal to, or greater than the \arg RHS.
-    int compare(const StringRef &RHS) const {
+    int compare(StringRef RHS) const {
       // Check the prefix for a mismatch.
       if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length)))
         return Res < 0 ? -1 : 1;
@@ -110,6 +117,9 @@ namespace llvm {
       return Length < RHS.Length ? -1 : 1;
     }
 
+    /// compare_lower - Compare two strings, ignoring case.
+    int compare_lower(StringRef RHS) const;
+
     /// str - Get the contents as an std::string.
     std::string str() const { return std::string(Data, Length); }
 
@@ -135,12 +145,12 @@ namespace llvm {
     /// @{
 
     /// startswith - Check if this string starts with the given \arg Prefix.
-    bool startswith(const StringRef &Prefix) const {
+    bool startswith(StringRef Prefix) const {
       return substr(0, Prefix.Length).equals(Prefix);
     }
 
     /// endswith - Check if this string ends with the given \arg Suffix.
-    bool endswith(const StringRef &Suffix) const {
+    bool endswith(StringRef Suffix) const {
       return slice(size() - Suffix.Length, size()).equals(Suffix);
     }
 
@@ -152,8 +162,8 @@ namespace llvm {
     ///
     /// \return - The index of the first occurence of \arg C, or npos if not
     /// found.
-    size_t find(char C) const {
-      for (size_t i = 0, e = Length; i != e; ++i)
+    size_t find(char C, size_t From = 0) const {
+      for (size_t i = std::min(From, Length), e = Length; i != e; ++i)
         if (Data[i] == C)
           return i;
       return npos;
@@ -163,7 +173,7 @@ namespace llvm {
     ///
     /// \return - The index of the first occurence of \arg Str, or npos if not
     /// found.
-    size_t find(const StringRef &Str) const;
+    size_t find(StringRef Str, size_t From = 0) const;
 
     /// rfind - Search for the last character \arg C in the string.
     ///
@@ -184,19 +194,27 @@ namespace llvm {
     ///
     /// \return - The index of the last occurence of \arg Str, or npos if not
     /// found.
-    size_t rfind(const StringRef &Str) const;
+    size_t rfind(StringRef Str) const;
+
+    /// find_first_of - Find the first character in the string that is \arg C,
+    /// or npos if not found. Same as find.
+    size_type find_first_of(char C, size_t = 0) const { return find(C); }
 
-    /// find_first_of - Find the first instance of the specified character or
-    /// return npos if not in string.  Same as find.
-    size_type find_first_of(char C) const { return find(C); }
+    /// find_first_of - Find the first character in the string that is in \arg
+    /// Chars, or npos if not found.
+    ///
+    /// Note: O(size() * Chars.size())
+    size_type find_first_of(StringRef Chars, size_t From = 0) const;
 
-    /// find_first_of - Find the first character from the string 'Chars' in the
-    /// current string or return npos if not in string.
-    size_type find_first_of(StringRef Chars) const;
+    /// find_first_not_of - Find the first character in the string that is not
+    /// \arg C or npos if not found.
+    size_type find_first_not_of(char C, size_t From = 0) const;
 
     /// find_first_not_of - Find the first character in the string that is not
-    /// in the string 'Chars' or return npos if all are in string. Same as find.
-    size_type find_first_not_of(StringRef Chars) const;
+    /// in the string \arg Chars, or npos if not found.
+    ///
+    /// Note: O(size() * Chars.size())
+    size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
 
     /// @}
     /// @name Helpful Algorithms
@@ -213,7 +231,7 @@ namespace llvm {
 
     /// count - Return the number of non-overlapped occurrences of \arg Str in
     /// the string.
-    size_t count(const StringRef &Str) const;
+    size_t count(StringRef Str) const;
 
     /// getAsInteger - Parse the current string as an integer of the specified
     /// radix.  If Radix is specified as zero, this does radix autosensing using
@@ -281,6 +299,42 @@ namespace llvm {
       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
     }
 
+    /// split - Split into two substrings around the first occurence of a
+    /// separator string.
+    ///
+    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
+    /// such that (*this == LHS + Separator + RHS) is true and RHS is
+    /// maximal. If \arg Separator is not in the string, then the result is a
+    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
+    ///
+    /// \param Separator - The string to split on.
+    /// \return - The split substrings.
+    std::pair<StringRef, StringRef> split(StringRef Separator) const {
+      size_t Idx = find(Separator);
+      if (Idx == npos)
+        return std::make_pair(*this, StringRef());
+      return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
+    }
+
+    /// split - Split into substrings around the occurences of a separator
+    /// string.
+    ///
+    /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most
+    /// \arg MaxSplit splits are done and consequently <= \arg MaxSplit
+    /// elements are added to A.
+    /// If \arg KeepEmpty is false, empty strings are not added to \arg A. They
+    /// still count when considering \arg MaxSplit
+    /// An useful invariant is that
+    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
+    ///
+    /// \param A - Where to put the substrings.
+    /// \param Separator - The string to split on.
+    /// \param MaxSplit - The maximum number of times the string is split.
+    /// \parm KeepEmpty - True if empty substring should be added.
+    void split(SmallVectorImpl<StringRef> &A,
+               StringRef Separator, int MaxSplit = -1,
+               bool KeepEmpty = true) const;
+
     /// rsplit - Split into two substrings around the last occurence of a
     /// separator character.
     ///
@@ -304,27 +358,27 @@ namespace llvm {
   /// @name StringRef Comparison Operators
   /// @{
 
-  inline bool operator==(const StringRef &LHS, const StringRef &RHS) {
+  inline bool operator==(StringRef LHS, StringRef RHS) {
     return LHS.equals(RHS);
   }
 
-  inline bool operator!=(const StringRef &LHS, const StringRef &RHS) {
+  inline bool operator!=(StringRef LHS, StringRef RHS) {
     return !(LHS == RHS);
   }
 
-  inline bool operator<(const StringRef &LHS, const StringRef &RHS) {
+  inline bool operator<(StringRef LHS, StringRef RHS) {
     return LHS.compare(RHS) == -1;
   }
 
-  inline bool operator<=(const StringRef &LHS, const StringRef &RHS) {
+  inline bool operator<=(StringRef LHS, StringRef RHS) {
     return LHS.compare(RHS) != 1;
   }
 
-  inline bool operator>(const StringRef &LHS, const StringRef &RHS) {
+  inline bool operator>(StringRef LHS, StringRef RHS) {
     return LHS.compare(RHS) == 1;
   }
 
-  inline bool operator>=(const StringRef &LHS, const StringRef &RHS) {
+  inline bool operator>=(StringRef LHS, StringRef RHS) {
     return LHS.compare(RHS) != -1;
   }
 
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index 48a52de..6562d57 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -65,6 +65,33 @@ public:
     return *this;
   }
   
+  template<unsigned N0, unsigned N1>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const T& Value) {
+    return Case(S0, Value).Case(S1, Value);
+  }
+  
+  template<unsigned N0, unsigned N1, unsigned N2>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const T& Value) {
+    return Case(S0, Value).Case(S1, Value).Case(S2, Value);
+  }
+  
+  template<unsigned N0, unsigned N1, unsigned N2, unsigned N3>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const T& Value) {
+    return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value);
+  }
+
+  template<unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                       const char (&S4)[N4], const T& Value) {
+    return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value)
+      .Case(S4, Value);
+  }
+  
   T Default(const T& Value) {
     if (ResultKnown)
       return Result;
diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h
index cf92862..b415990 100644
--- a/include/llvm/ADT/Trie.h
+++ b/include/llvm/ADT/Trie.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/Support/DOTGraphTraits.h"
 
+#include <cassert>
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 7fb0014..a9e3e53 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -94,6 +94,7 @@ public:
     MinGW64,
     NetBSD,
     OpenBSD,
+    Psp,
     Solaris,
     Win32,
     Haiku
@@ -160,6 +161,8 @@ public:
   /// @name Direct Component Access
   /// @{
 
+  const std::string &str() const { return Data; }
+
   const std::string &getTriple() const { return Data; }
 
   /// getArchName - Get the architecture (first) component of the
@@ -218,23 +221,27 @@ public:
 
   /// setArchName - Set the architecture (first) component of the
   /// triple by name.
-  void setArchName(const StringRef &Str);
+  void setArchName(StringRef Str);
 
   /// setVendorName - Set the vendor (second) component of the triple
   /// by name.
-  void setVendorName(const StringRef &Str);
+  void setVendorName(StringRef Str);
 
   /// setOSName - Set the operating system (third) component of the
   /// triple by name.
-  void setOSName(const StringRef &Str);
+  void setOSName(StringRef Str);
 
   /// setEnvironmentName - Set the optional environment (fourth)
   /// component of the triple by name.
-  void setEnvironmentName(const StringRef &Str);
+  void setEnvironmentName(StringRef Str);
 
   /// setOSAndEnvironmentName - Set the operating system and optional
   /// environment components with a single string.
-  void setOSAndEnvironmentName(const StringRef &Str);
+  void setOSAndEnvironmentName(StringRef Str);
+
+  /// getArchNameForAssembler - Get an architecture name that is understood by the
+  /// target assembler.
+  const char *getArchNameForAssembler();
 
   /// @}
   /// @name Static helpers for IDs.
@@ -265,12 +272,12 @@ public:
 
   /// getArchTypeForLLVMName - The canonical type for the given LLVM
   /// architecture name (e.g., "x86").
-  static ArchType getArchTypeForLLVMName(const StringRef &Str);
+  static ArchType getArchTypeForLLVMName(StringRef Str);
 
   /// getArchTypeForDarwinArchName - Get the architecture type for a "Darwin"
   /// architecture name, for example as accepted by "gcc -arch" (see also
   /// arch(3)).
-  static ArchType getArchTypeForDarwinArchName(const StringRef &Str);
+  static ArchType getArchTypeForDarwinArchName(StringRef Str);
 
   /// @}
 };
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 435f8ea..440d182 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -71,6 +71,18 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
     if (const BranchInst *BI = dyn_cast<BranchInst>(Node->getTerminator()))
       if (BI->isConditional())
         return (I == succ_begin(Node)) ? "T" : "F";
+    
+    // Label source of switch edges with the associated value.
+    if (const SwitchInst *SI = dyn_cast<SwitchInst>(Node->getTerminator())) {
+      unsigned SuccNo = I.getSuccessorIndex();
+
+      if (SuccNo == 0) return "def";
+      
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << SI->getCaseValue(SuccNo)->getValue();
+      return OS.str();
+    }    
     return "";
   }
 };
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 78a16da..06951c7 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -26,20 +26,18 @@ namespace llvm {
   class TargetData;
   class Function;
   class Type;
-  class LLVMContext;
 
 /// ConstantFoldInstruction - Attempt to constant fold the specified
 /// instruction.  If successful, the constant result is returned, if not, null
 /// is returned.  Note that this function can only fail when attempting to fold
 /// instructions like loads and stores, which have no constant expression form.
 ///
-Constant *ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
-                                  const TargetData *TD = 0);
+Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0);
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
-Constant *ConstantFoldConstantExpression(ConstantExpr *CE, LLVMContext &Context,
+Constant *ConstantFoldConstantExpression(ConstantExpr *CE,
                                          const TargetData *TD = 0);
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -49,8 +47,7 @@ Constant *ConstantFoldConstantExpression(ConstantExpr *CE, LLVMContext &Context,
 /// form.
 ///
 Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
-                                   Constant*const * Ops, unsigned NumOps,
-                                   LLVMContext &Context,
+                                   Constant *const *Ops, unsigned NumOps,
                                    const TargetData *TD = 0);
 
 /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
@@ -58,8 +55,7 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
 /// returns a constant expression of the specified operands.
 ///
 Constant *ConstantFoldCompareInstOperands(unsigned Predicate,
-                                          Constant*const * Ops, unsigned NumOps,
-                                          LLVMContext &Context,
+                                          Constant *LHS, Constant *RHS,
                                           const TargetData *TD = 0);
 
 /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
@@ -79,7 +75,7 @@ bool canConstantFoldCallTo(const Function *F);
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
 Constant *
-ConstantFoldCall(Function *F, Constant* const* Operands, unsigned NumOperands);
+ConstantFoldCall(Function *F, Constant *const *Operands, unsigned NumOperands);
 }
 
 #endif
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index cfe3632..3c40d65 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -26,8 +26,6 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ValueHandle.h"
 
-#define ATTACH_DEBUG_INFO_TO_AN_INSN 1
-
 namespace llvm {
   class BasicBlock;
   class Constant;
@@ -46,9 +44,11 @@ namespace llvm {
   class Instruction;
   class LLVMContext;
 
+  /// DIDescriptor - A thin wraper around MDNode to access encoded debug info. This should not
+  /// be stored in a container, because underly MDNode may change in certain situations.
   class DIDescriptor {
   protected:
-    TrackingVH<MDNode> DbgNode;
+    MDNode  *DbgNode;
 
     /// DIDescriptor constructor.  If the specified node is non-null, check
     /// to make sure that the tag in the descriptor matches 'RequiredTag'.  If
@@ -468,15 +468,8 @@ namespace llvm {
     Module &M;
     LLVMContext& VMContext;
 
-    // Cached values for uniquing and faster lookups.
     const Type *EmptyStructPtr; // "{}*".
-    Function *StopPointFn;   // llvm.dbg.stoppoint
-    Function *FuncStartFn;   // llvm.dbg.func.start
-    Function *RegionStartFn; // llvm.dbg.region.start
-    Function *RegionEndFn;   // llvm.dbg.region.end
     Function *DeclareFn;     // llvm.dbg.declare
-    StringMap<Constant*> StringCache;
-    DenseMap<Constant*, DIDescriptor> SimpleConstantCache;
 
     DIFactory(const DIFactory &);     // DO NOT IMPLEMENT
     void operator=(const DIFactory&); // DO NOT IMPLEMENT
@@ -496,26 +489,26 @@ namespace llvm {
     /// CreateCompileUnit - Create a new descriptor for the specified compile
     /// unit.
     DICompileUnit CreateCompileUnit(unsigned LangID,
-                                    StringRef Filenae,
-                                    StringRef Directory,
-                                    StringRef Producer,
+                                    const char * Filename,
+                                    const char * Directory,
+                                    const char * Producer,
                                     bool isMain = false,
                                     bool isOptimized = false,
                                     const char *Flags = "",
                                     unsigned RunTimeVer = 0);
 
     /// CreateEnumerator - Create a single enumerator value.
-    DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val);
+    DIEnumerator CreateEnumerator(const char * Name, uint64_t Val);
 
     /// CreateBasicType - Create a basic type like int, float, etc.
-    DIBasicType CreateBasicType(DIDescriptor Context, StringRef Name,
+    DIBasicType CreateBasicType(DIDescriptor Context, const char * Name,
                                 DICompileUnit CompileUnit, unsigned LineNumber,
                                 uint64_t SizeInBits, uint64_t AlignInBits,
                                 uint64_t OffsetInBits, unsigned Flags,
                                 unsigned Encoding);
 
     /// CreateBasicType - Create a basic type like int, float, etc.
-    DIBasicType CreateBasicTypeEx(DIDescriptor Context, StringRef Name,
+    DIBasicType CreateBasicTypeEx(DIDescriptor Context, const char * Name,
                                 DICompileUnit CompileUnit, unsigned LineNumber,
                                 Constant *SizeInBits, Constant *AlignInBits,
                                 Constant *OffsetInBits, unsigned Flags,
@@ -524,7 +517,7 @@ namespace llvm {
     /// CreateDerivedType - Create a derived type like const qualified type,
     /// pointer, typedef, etc.
     DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context,
-                                    StringRef Name,
+                                    const char * Name,
                                     DICompileUnit CompileUnit,
                                     unsigned LineNumber,
                                     uint64_t SizeInBits, uint64_t AlignInBits,
@@ -534,7 +527,7 @@ namespace llvm {
     /// CreateDerivedType - Create a derived type like const qualified type,
     /// pointer, typedef, etc.
     DIDerivedType CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context,
-                                        StringRef Name,
+                                        const char * Name,
                                     DICompileUnit CompileUnit,
                                     unsigned LineNumber,
                                     Constant *SizeInBits, Constant *AlignInBits,
@@ -543,7 +536,7 @@ namespace llvm {
 
     /// CreateCompositeType - Create a composite type like array, struct, etc.
     DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context,
-                                        StringRef Name,
+                                        const char * Name,
                                         DICompileUnit CompileUnit,
                                         unsigned LineNumber,
                                         uint64_t SizeInBits,
@@ -555,7 +548,7 @@ namespace llvm {
 
     /// CreateCompositeType - Create a composite type like array, struct, etc.
     DICompositeType CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context,
-                                        StringRef Name,
+                                        const char * Name,
                                         DICompileUnit CompileUnit,
                                         unsigned LineNumber,
                                         Constant *SizeInBits,
@@ -567,25 +560,25 @@ namespace llvm {
 
     /// CreateSubprogram - Create a new descriptor for the specified subprogram.
     /// See comments in DISubprogram for descriptions of these fields.
-    DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name,
-                                  StringRef DisplayName,
-                                  StringRef LinkageName,
+    DISubprogram CreateSubprogram(DIDescriptor Context, const char * Name,
+                                  const char * DisplayName,
+                                  const char * LinkageName,
                                   DICompileUnit CompileUnit, unsigned LineNo,
                                   DIType Type, bool isLocalToUnit,
                                   bool isDefinition);
 
     /// CreateGlobalVariable - Create a new descriptor for the specified global.
     DIGlobalVariable
-    CreateGlobalVariable(DIDescriptor Context, StringRef Name,
-                         StringRef DisplayName,
-                         StringRef LinkageName,
+    CreateGlobalVariable(DIDescriptor Context, const char * Name,
+                         const char * DisplayName,
+                         const char * LinkageName,
                          DICompileUnit CompileUnit,
                          unsigned LineNo, DIType Type, bool isLocalToUnit,
                          bool isDefinition, llvm::GlobalVariable *GV);
 
     /// CreateVariable - Create a new descriptor for the specified variable.
     DIVariable CreateVariable(unsigned Tag, DIDescriptor Context,
-                              StringRef Name,
+                              const char * Name,
                               DICompileUnit CompileUnit, unsigned LineNo,
                               DIType Type);
 
@@ -605,30 +598,13 @@ namespace llvm {
     DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo,
                               DIScope S, DILocation OrigLoc);
 
-    /// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation,
-    /// inserting it at the end of the specified basic block.
-    void InsertStopPoint(DICompileUnit CU, unsigned LineNo, unsigned ColNo,
-                         BasicBlock *BB);
-
-    /// InsertSubprogramStart - Create a new llvm.dbg.func.start intrinsic to
-    /// mark the start of the specified subprogram.
-    void InsertSubprogramStart(DISubprogram SP, BasicBlock *BB);
-
-    /// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to
-    /// mark the start of a region for the specified scoping descriptor.
-    void InsertRegionStart(DIDescriptor D, BasicBlock *BB);
-
-    /// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to
-    /// mark the end of a region for the specified scoping descriptor.
-    void InsertRegionEnd(DIDescriptor D, BasicBlock *BB);
-
     /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-    void InsertDeclare(llvm::Value *Storage, DIVariable D,
-                       BasicBlock *InsertAtEnd);
+    Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D,
+                               BasicBlock *InsertAtEnd);
 
     /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-    void InsertDeclare(llvm::Value *Storage, DIVariable D,
-                       Instruction *InsertBefore);
+    Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D,
+                               Instruction *InsertBefore);
 
   private:
     Constant *GetTagConstant(unsigned TAG);
@@ -693,12 +669,6 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
   DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI,
                                 DebugLocTracker &DebugLocInfo);
 
-  /// isInlinedFnStart - Return true if FSI is starting an inlined function.
-  bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn);
-
-  /// isInlinedFnEnd - Return true if REI is ending an inlined function.
-  bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn);
-  /// DebugInfoFinder - This object collects DebugInfo from a module.
   class DebugInfoFinder {
 
   public:
@@ -716,21 +686,12 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
     /// processSubprogram - Process DISubprogram.
     void processSubprogram(DISubprogram SP);
 
-    /// processStopPoint - Process DbgStopPointInst.
-    void processStopPoint(DbgStopPointInst *SPI);
-
-    /// processFuncStart - Process DbgFuncStartInst.
-    void processFuncStart(DbgFuncStartInst *FSI);
-
-    /// processRegionStart - Process DbgRegionStart.
-    void processRegionStart(DbgRegionStartInst *DRS);
-
-    /// processRegionEnd - Process DbgRegionEnd.
-    void processRegionEnd(DbgRegionEndInst *DRE);
-
     /// processDeclare - Process DbgDeclareInst.
     void processDeclare(DbgDeclareInst *DDI);
 
+    /// processLocation - Process DILocation.
+    void processLocation(DILocation Loc);
+
     /// addCompileUnit - Add compile unit into CUs.
     bool addCompileUnit(DICompileUnit CU);
 
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 17aaf95..2e149d5 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -310,7 +310,6 @@ public:
     if (DomTreeNodes.size() != OtherDomTreeNodes.size())
       return true;
 
-    SmallPtrSet<const NodeT *,4> MyBBs;
     for (typename DomTreeNodeMapType::const_iterator 
            I = this->DomTreeNodes.begin(),
            E = this->DomTreeNodes.end(); I != E; ++I) {
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 948c675..22fbb35 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -161,6 +161,10 @@ public:
   void addUser(const SCEV *Offset, Instruction *User, Value *Operand) {
     Users.push_back(new IVStrideUse(this, Offset, User, Operand));
   }
+
+  void removeUser(IVStrideUse *User) {
+    Users.erase(User);
+  }
 };
 
 class IVUsers : public LoopPass {
@@ -201,6 +205,9 @@ public:
   /// return true.  Otherwise, return false.
   bool AddUsersIfInteresting(Instruction *I);
 
+  void AddUser(const SCEV *Stride, const SCEV *Offset,
+               Instruction *User, Value *Operand);
+
   /// getReplacementExpr - Return a SCEV expression which computes the
   /// value of the OperandValToReplace of the given IVStrideUse.
   const SCEV *getReplacementExpr(const IVStrideUse &U) const;
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
new file mode 100644
index 0000000..aa5c0f5
--- /dev/null
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -0,0 +1,74 @@
+//===-- InstructionSimplify.h - Fold instructions into simpler forms ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares routines for folding instructions into simpler forms that
+// do not require creating new instructions.  For example, this does constant
+// folding, and can handle identities like (X&0)->0.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+
+namespace llvm {
+  class Instruction;
+  class Value;
+  class TargetData;
+  
+  /// SimplifyAndInst - Given operands for an And, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyAndInst(Value *LHS, Value *RHS,
+                         const TargetData *TD = 0);
+
+  /// SimplifyOrInst - Given operands for an Or, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyOrInst(Value *LHS, Value *RHS,
+                        const TargetData *TD = 0);
+  
+  /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                          const TargetData *TD = 0);
+  
+  /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                          const TargetData *TD = 0);
+  
+
+  //=== Helper functions for higher up the class hierarchy.
+  
+  
+  /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                         const TargetData *TD = 0);
+  
+  /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, 
+                       const TargetData *TD = 0);
+  
+  /// SimplifyInstruction - See if we can compute a simplified version of this
+  /// instruction.  If not, this returns null.
+  Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0);
+  
+  
+  /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+  /// delete the From instruction.  In addition to a basic RAUW, this does a
+  /// recursive simplification of the updated instructions.  This catches
+  /// things where one simplification exposes other opportunities.  This only
+  /// simplifies and deletes scalar operations, it does not change the CFG.
+  ///
+  void ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+                                 const TargetData *TD = 0);
+} // end namespace llvm
+
+#endif
+
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
new file mode 100644
index 0000000..566788d
--- /dev/null
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -0,0 +1,73 @@
+//===- LazyValueInfo.h - Value constraint analysis --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LIVEVALUES_H
+#define LLVM_ANALYSIS_LIVEVALUES_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+  class Constant;
+  class TargetData;
+  class Value;
+  
+/// LazyValueInfo - This pass computes, caches, and vends lazy value constraint
+/// information.
+class LazyValueInfo : public FunctionPass {
+  class TargetData *TD;
+  void *PImpl;
+  LazyValueInfo(const LazyValueInfo&); // DO NOT IMPLEMENT.
+  void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT.
+public:
+  static char ID;
+  LazyValueInfo() : FunctionPass(&ID), PImpl(0) {}
+  ~LazyValueInfo() { assert(PImpl == 0 && "releaseMemory not called"); }
+
+  /// Tristate - This is used to return true/false/dunno results.
+  enum Tristate {
+    Unknown = -1, False = 0, True = 1
+  };
+  
+  
+  // Public query interface.
+  
+  /// getPredicateOnEdge - Determine whether the specified value comparison
+  /// with a constant is known to be true or false on the specified CFG edge.
+  /// Pred is a CmpInst predicate.
+  Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+                              BasicBlock *FromBB, BasicBlock *ToBB);
+  
+  
+  /// getConstant - Determine whether the specified value is known to be a
+  /// constant at the end of the specified block.  Return null if not.
+  Constant *getConstant(Value *V, BasicBlock *BB);
+
+  /// getConstantOnEdge - Determine whether the specified value is known to be a
+  /// constant on the specified edge.  Return null if not.
+  Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB);
+  
+  
+  // Implementation boilerplate.
+  
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+  virtual void releaseMemory();
+  virtual bool runOnFunction(Function &F);
+};
+
+}  // end namespace llvm
+
+#endif
+
diff --git a/include/llvm/Analysis/LiveValues.h b/include/llvm/Analysis/LiveValues.h
index 31b00d7..b92cb78 100644
--- a/include/llvm/Analysis/LiveValues.h
+++ b/include/llvm/Analysis/LiveValues.h
@@ -94,10 +94,6 @@ public:
   bool isKilledInBlock(const Value *V, const BasicBlock *BB);
 };
 
-/// createLiveValuesPass - This creates an instance of the LiveValues pass.
-///
-FunctionPass *createLiveValuesPass();
-
-}
+}  // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index bc87adb..6504bdc 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -114,8 +114,8 @@ public:
   block_iterator block_begin() const { return Blocks.begin(); }
   block_iterator block_end() const { return Blocks.end(); }
 
-  /// isLoopExiting - True if terminator in the block can branch to another block
-  /// that is outside of the current loop.
+  /// isLoopExiting - True if terminator in the block can branch to another
+  /// block that is outside of the current loop.
   ///
   bool isLoopExiting(const BlockT *BB) const {
     typedef GraphTraits<BlockT*> BlockTraits;
@@ -572,6 +572,10 @@ public:
   /// normal form.
   bool isLoopSimplifyForm() const;
 
+  /// hasDedicatedExits - Return true if no exit block for the loop
+  /// has a predecessor that is outside the loop.
+  bool hasDedicatedExits() const;
+
   /// getUniqueExitBlocks - Return all unique successor blocks of this loop. 
   /// These are the blocks _outside of the current loop_ which are branched to.
   /// This assumes that loop is in canonical form.
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index fde7dc6..f6fa0c8 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -17,7 +17,6 @@
 
 namespace llvm {
 class CallInst;
-class LLVMContext;
 class PointerType;
 class TargetData;
 class Type;
@@ -29,54 +28,52 @@ class Value;
 
 /// isMalloc - Returns true if the value is either a malloc call or a bitcast of 
 /// the result of a malloc call
-bool isMalloc(const Value* I);
+bool isMalloc(const Value *I);
 
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
 /// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
 /// ignore InvokeInst here.
-const CallInst* extractMallocCall(const Value* I);
-CallInst* extractMallocCall(Value* I);
+const CallInst *extractMallocCall(const Value *I);
+CallInst *extractMallocCall(Value *I);
 
 /// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
 /// instruction is a bitcast of the result of a malloc call.
-const CallInst* extractMallocCallFromBitCast(const Value* I);
-CallInst* extractMallocCallFromBitCast(Value* I);
+const CallInst *extractMallocCallFromBitCast(const Value *I);
+CallInst *extractMallocCallFromBitCast(Value *I);
 
 /// isArrayMalloc - Returns the corresponding CallInst if the instruction 
 /// is a call to malloc whose array size can be determined and the array size
 /// is not constant 1.  Otherwise, return NULL.
-CallInst* isArrayMalloc(Value* I, LLVMContext &Context, const TargetData* TD);
-const CallInst* isArrayMalloc(const Value* I, LLVMContext &Context,
-                              const TargetData* TD);
+const CallInst *isArrayMalloc(const Value *I, const TargetData *TD);
 
 /// getMallocType - Returns the PointerType resulting from the malloc call.
 /// The PointerType depends on the number of bitcast uses of the malloc call:
 ///   0: PointerType is the malloc calls' return type.
 ///   1: PointerType is the bitcast's result type.
 ///  >1: Unique PointerType cannot be determined, return NULL.
-const PointerType* getMallocType(const CallInst* CI);
+const PointerType *getMallocType(const CallInst *CI);
 
 /// getMallocAllocatedType - Returns the Type allocated by malloc call.
 /// The Type depends on the number of bitcast uses of the malloc call:
 ///   0: PointerType is the malloc calls' return type.
 ///   1: PointerType is the bitcast's result type.
 ///  >1: Unique PointerType cannot be determined, return NULL.
-const Type* getMallocAllocatedType(const CallInst* CI);
+const Type *getMallocAllocatedType(const CallInst *CI);
 
 /// getMallocArraySize - Returns the array size of a malloc call.  If the 
 /// argument passed to malloc is a multiple of the size of the malloced type,
 /// then return that multiple.  For non-array mallocs, the multiple is
 /// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
 /// determined.
-Value* getMallocArraySize(CallInst* CI, LLVMContext &Context,
-                          const TargetData* TD);
+Value *getMallocArraySize(CallInst *CI, const TargetData *TD,
+                          bool LookThroughSExt = false);
                           
 //===----------------------------------------------------------------------===//
 //  free Call Utility Functions.
 //
 
 /// isFreeCall - Returns true if the the value is a call to the builtin free()
-bool isFreeCall(const Value* I);
+bool isFreeCall(const Value *I);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 66ab3ea..b222321 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -139,6 +139,12 @@ namespace llvm {
   // createLiveValuesPass - This creates an instance of the LiveValues pass.
   //
   FunctionPass *createLiveValuesPass();
+  
+  //===--------------------------------------------------------------------===//
+  //
+  /// createLazyValueInfoPass - This creates an instance of the LazyValueInfo
+  /// pass.
+  FunctionPass *createLazyValueInfoPass();
 
   //===--------------------------------------------------------------------===//
   //
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 915227d..bbdd043 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -38,8 +38,7 @@ namespace llvm {
     friend struct SCEVVisitor<SCEVExpander, Value*>;
   public:
     explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), Builder(se.getContext(),
-                        TargetFolder(se.TD, se.getContext())) {}
+      : SE(se), Builder(se.getContext(), TargetFolder(se.TD)) {}
 
     /// clear - Erase the contents of the InsertedExpressions map so that users
     /// trying to expand the same expression into multiple BasicBlocks or
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index 820e1bd1..677d41d 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -153,7 +153,7 @@ public:
   /// value.  If an value is not in the map, it is returned as untracked,
   /// unlike the getOrInitValueState method.
   LatticeVal getLatticeState(Value *V) const {
-    DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+    DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
     return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal();
   }
   
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index f233608..038d442 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -63,6 +63,15 @@ namespace llvm {
   unsigned ComputeNumSignBits(Value *Op, const TargetData *TD = 0,
                               unsigned Depth = 0);
 
+  /// ComputeMultiple - This function computes the integer multiple of Base that
+  /// equals V.  If successful, it returns true and returns the multiple in
+  /// Multiple.  If unsuccessful, it returns false.  Also, if V can be
+  /// simplified to an integer, then the simplified V is returned in Val.  Look
+  /// through sext only if LookThroughSExt=true.
+  bool ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+                       bool LookThroughSExt = false,
+                       unsigned Depth = 0);
+
   /// CannotBeNegativeZero - Return true if we can prove that the specified FP 
   /// value is never equal to -0.0.
   ///
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index ba4caeb..80d8702 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -247,7 +247,7 @@ private:
   /// almost never 2, and inconceivably 3 or more.
   void AdjustBlockAddressRefCount(int Amt) {
     SubclassData += Amt;
-    assert((int)(char)SubclassData >= 0 && "Refcount wrap-around");
+    assert((int)(signed char)SubclassData >= 0 && "Refcount wrap-around");
   }
 };
 
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index e48a190..2b1b85e 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -294,7 +294,7 @@ private:
   /// known to exist at the end of the the record.
   template<typename uintty>
   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                                const StringRef &Blob) {
+                                StringRef Blob) {
     const char *BlobData = Blob.data();
     unsigned BlobLen = (unsigned) Blob.size();
     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
@@ -422,7 +422,7 @@ public:
   /// of the record.
   template<typename uintty>
   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                          const StringRef &Blob) {
+                          StringRef Blob) {
     EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
   }
   template<typename uintty>
@@ -435,7 +435,7 @@ public:
   /// that end with an array.
   template<typename uintty>
   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                          const StringRef &Array) {
+                          StringRef Array) {
     EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
   }
   template<typename uintty>
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index d051f84..109ff74 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -86,6 +86,14 @@ namespace llvm {
     DwarfWriter *DW;
 
   public:
+    /// Flags to specify different kinds of comments to output in
+    /// assembly code.  These flags carry semantic information not
+    /// otherwise easily derivable from the IR text.
+    ///
+    enum CommentFlag {
+      ReloadReuse = 0x1
+    };
+
     /// Output stream on which we're printing assembly code.
     ///
     formatted_raw_ostream &O;
@@ -172,11 +180,11 @@ namespace llvm {
 
     /// EmitStartOfAsmFile - This virtual method can be overridden by targets
     /// that want to emit something at the start of their file.
-    virtual void EmitStartOfAsmFile(Module &M) {}
+    virtual void EmitStartOfAsmFile(Module &) {}
     
     /// EmitEndOfAsmFile - This virtual method can be overridden by targets that
     /// want to emit something at the end of their file.
-    virtual void EmitEndOfAsmFile(Module &M) {}
+    virtual void EmitEndOfAsmFile(Module &) {}
     
     /// doFinalization - Shut down the asmprinter.  If you override this in your
     /// pass, you must make sure to call it explicitly.
@@ -373,10 +381,10 @@ namespace llvm {
 
     /// printImplicitDef - This method prints the specified machine instruction
     /// that is an implicit def.
-    virtual void printImplicitDef(const MachineInstr *MI) const;
+    void printImplicitDef(const MachineInstr *MI) const;
 
     /// printKill - This method prints the specified kill machine instruction.
-    virtual void printKill(const MachineInstr *MI) const;
+    void printKill(const MachineInstr *MI) const;
 
     /// printPICJumpTableSetLabel - This method prints a set label for the
     /// specified MachineBasicBlock for a jumptable entry.
diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h
index 9e2ef18..3ade7c9 100644
--- a/include/llvm/CodeGen/BinaryObject.h
+++ b/include/llvm/CodeGen/BinaryObject.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CODEGEN_BINARYOBJECT_H
 #define LLVM_CODEGEN_BINARYOBJECT_H
 
+#include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/System/DataTypes.h"
 
 #include <string>
@@ -22,7 +23,6 @@
 
 namespace llvm {
 
-class MachineRelocation;
 typedef std::vector<uint8_t> BinaryData;
 
 class BinaryObject {
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 5e730fc..45a2757 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -183,6 +183,13 @@ public:
   void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                      CCAssignFn Fn);
 
+  /// CheckReturn - Analyze the return values of a function, returning
+  /// true if the return can be performed without sret-demotion, and
+  /// false otherwise.
+  bool CheckReturn(const SmallVectorImpl<EVT> &OutTys,
+                   const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+                   CCAssignFn Fn);
+
   /// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
   /// incorporating info about the passed values into this state.
   void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/include/llvm/CodeGen/DAGISelHeader.h b/include/llvm/CodeGen/DAGISelHeader.h
index b2acbc1..624f18a 100644
--- a/include/llvm/CodeGen/DAGISelHeader.h
+++ b/include/llvm/CodeGen/DAGISelHeader.h
@@ -64,22 +64,22 @@ public:
 
 /// ReplaceUses - replace all uses of the old node F with the use
 /// of the new node T.
-void ReplaceUses(SDValue F, SDValue T) DISABLE_INLINE {
+DISABLE_INLINE void ReplaceUses(SDValue F, SDValue T) {
   ISelUpdater ISU(ISelPosition);
   CurDAG->ReplaceAllUsesOfValueWith(F, T, &ISU);
 }
 
 /// ReplaceUses - replace all uses of the old nodes F with the use
 /// of the new nodes T.
-void ReplaceUses(const SDValue *F, const SDValue *T,
-                 unsigned Num) DISABLE_INLINE {
+DISABLE_INLINE void ReplaceUses(const SDValue *F, const SDValue *T,
+                                unsigned Num) {
   ISelUpdater ISU(ISelPosition);
   CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num, &ISU);
 }
 
 /// ReplaceUses - replace all uses of the old node F with the use
 /// of the new node T.
-void ReplaceUses(SDNode *F, SDNode *T) DISABLE_INLINE {
+DISABLE_INLINE void ReplaceUses(SDNode *F, SDNode *T) {
   ISelUpdater ISU(ISelPosition);
   CurDAG->ReplaceAllUsesWith(F, T, &ISU);
 }
diff --git a/include/llvm/CodeGen/DwarfWriter.h b/include/llvm/CodeGen/DwarfWriter.h
index e7a2f66..460c3c7 100644
--- a/include/llvm/CodeGen/DwarfWriter.h
+++ b/include/llvm/CodeGen/DwarfWriter.h
@@ -87,31 +87,15 @@ public:
   /// the source line list.
   unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
 
-  /// RecordRegionStart - Indicate the start of a region.
-  unsigned RecordRegionStart(MDNode *N);
-
-  /// RecordRegionEnd - Indicate the end of a region.
-  unsigned RecordRegionEnd(MDNode *N);
-
   /// getRecordSourceLineCount - Count source lines.
   unsigned getRecordSourceLineCount();
 
-  /// RecordVariable - Indicate the declaration of  a local variable.
-  ///
-  void RecordVariable(MDNode *N, unsigned FrameIndex);
-
   /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
   /// be emitted.
   bool ShouldEmitDwarfDebug() const;
 
-  //// RecordInlinedFnStart - Indicate the start of a inlined function.
-  unsigned RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
-                                unsigned Line, unsigned Col);
-
-  /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-  unsigned RecordInlinedFnEnd(DISubprogram SP);
-  void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L);
-  void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L);
+  void BeginScope(const MachineInstr *MI, unsigned Label);
+  void EndScope(const MachineInstr *MI);
 };
 
 } // end llvm namespace
diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 1673c89..7d1b1fe 100644
--- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -16,6 +16,7 @@
 #define LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
 
 #include "llvm/CodeGen/GCs.h"
+#include <cstdlib>
 
 namespace {
   struct ForceAsmWriterLinking {
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index efb4a03..cf768c3 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -189,20 +189,8 @@ namespace llvm {
       return indexes_->getMBBFromIndex(index);
     }
 
-    bool hasGapBeforeInstr(SlotIndex index) {
-      return indexes_->hasGapBeforeInstr(index);
-    }
-
-    bool hasGapAfterInstr(SlotIndex index) {
-      return indexes_->hasGapAfterInstr(index);
-    }
-
-    SlotIndex findGapBeforeInstr(SlotIndex index, bool furthest = false) {
-      return indexes_->findGapBeforeInstr(index, furthest);
-    }
-
-    void InsertMachineInstrInMaps(MachineInstr *MI, SlotIndex Index) {
-      indexes_->insertMachineInstrInMaps(MI, Index);
+    SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) {
+      return indexes_->insertMachineInstrInMaps(MI);
     }
 
     void RemoveMachineInstrFromMaps(MachineInstr *MI) {
@@ -219,7 +207,7 @@ namespace llvm {
     }
 
     void renumber() {
-      indexes_->renumber();
+      indexes_->renumberIndexes();
     }
 
     BumpPtrAllocator& getVNInfoAllocator() { return VNInfoAllocator; }
@@ -290,9 +278,10 @@ namespace llvm {
     /// computeIntervals - Compute live intervals.
     void computeIntervals();
 
-    bool isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
-                                SmallVector<MachineInstr*,16> &IdentCopies,
-                                SmallVector<MachineInstr*,16> &OtherCopies);
+    bool isSafeAndProfitableToCoalesce(LiveInterval &DstInt,
+                                       LiveInterval &SrcInt,
+                 SmallVector<MachineInstr*,16> &IdentCopies,
+                 SmallVector<MachineInstr*,16> &OtherCopies);
 
     void performEarlyCoalescing();
 
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 172fb75..b2be569 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -103,7 +103,10 @@ public:
       Kills.erase(I);
       return true;
     }
-    
+
+    /// findKill - Find a kill instruction in MBB. Return NULL if none is found.
+    MachineInstr *findKill(const MachineBasicBlock *MBB) const;
+
     void dump() const;
   };
 
@@ -263,6 +266,12 @@ public:
   void HandleVirtRegDef(unsigned reg, MachineInstr *MI);
   void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
                         MachineInstr *MI);
+
+  /// addNewBlock - Add a new basic block BB as an empty succcessor to
+  /// DomBB. All variables that are live out of DomBB will be marked as passing
+  /// live through BB. This method assumes that the machine code is still in SSA
+  /// form.
+  void addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h
index d4027cc..27306c6 100644
--- a/include/llvm/CodeGen/MachORelocation.h
+++ b/include/llvm/CodeGen/MachORelocation.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_CODEGEN_MACHO_RELOCATION_H
 #define LLVM_CODEGEN_MACHO_RELOCATION_H
 
+#include "llvm/System/DataTypes.h"
+
 namespace llvm {
 
   /// MachORelocation - This struct contains information about each relocation
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 585ee14..bb50b5d 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -225,7 +225,13 @@ public:
   /// potential fall-throughs at the end of the block.
   void moveBefore(MachineBasicBlock *NewAfter);
   void moveAfter(MachineBasicBlock *NewBefore);
-  
+
+  /// updateTerminator - Update the terminator instructions in block to account
+  /// for changes to the layout. If the block previously used a fallthrough,
+  /// it may now need a branch, and if it previously used branching it may now
+  /// be able to use a fallthrough.
+  void updateTerminator();
+
   // Machine-CFG mutators
   
   /// addSuccessor - Add succ as a successor of this MachineBasicBlock.
@@ -246,7 +252,7 @@ public:
   
   /// transferSuccessors - Transfers all the successors from MBB to this
   /// machine basic block (i.e., copies all the successors fromMBB and
-  /// remove all the successors fromBB).
+  /// remove all the successors from fromMBB).
   void transferSuccessors(MachineBasicBlock *fromMBB);
   
   /// isSuccessor - Return true if the specified MBB is a successor of this
@@ -352,6 +358,8 @@ private:   // Methods used to maintain doubly linked list of blocks...
 
 raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
 
+void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t);
+
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for machine basic block graphs (machine-CFGs)
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineCodeInfo.h b/include/llvm/CodeGen/MachineCodeInfo.h
index 024e602..a75c02a 100644
--- a/include/llvm/CodeGen/MachineCodeInfo.h
+++ b/include/llvm/CodeGen/MachineCodeInfo.h
@@ -17,6 +17,8 @@
 #ifndef EE_MACHINE_CODE_INFO_H
 #define EE_MACHINE_CODE_INFO_H
 
+#include "llvm/System/DataTypes.h"
+
 namespace llvm {
 
 class MachineCodeInfo {
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index e56776b..086528a 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -23,8 +23,6 @@
 
 namespace llvm {
 
-inline void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t) {  }
-
 template<>
 inline void DominatorTreeBase<MachineBasicBlock>::addRoot(MachineBasicBlock* MBB) {
   this->Roots.push_back(MBB);
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 07c1eca..bed82af 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -15,9 +15,11 @@
 #define LLVM_CODEGEN_MACHINEFRAMEINFO_H
 
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/System/DataTypes.h"
 #include <cassert>
+#include <limits>
 #include <vector>
 
 namespace llvm {
@@ -106,8 +108,8 @@ class MachineFrameInfo {
     // cannot alias any other memory objects.
     bool isSpillSlot;
 
-    StackObject(uint64_t Sz, unsigned Al, int64_t SP = 0, bool IM = false,
-                bool isSS = false)
+    StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
+                bool isSS)
       : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
         isSpillSlot(isSS) {}
   };
@@ -182,6 +184,10 @@ class MachineFrameInfo {
   /// CSIValid - Has CSInfo been set yet?
   bool CSIValid;
 
+  /// SpillObjects - A vector indicating which frame indices refer to
+  /// spill slots.
+  SmallVector<bool, 8> SpillObjects;
+
   /// MMI - This field is set (via setMachineModuleInfo) by a module info
   /// consumer (ex. DwarfWriter) to indicate that frame layout information
   /// should be acquired.  Typically, it's the responsibility of the target's
@@ -192,6 +198,7 @@ class MachineFrameInfo {
   /// TargetFrameInfo - Target information about frame layout.
   ///
   const TargetFrameInfo &TFI;
+
 public:
   explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) {
     StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
@@ -341,7 +348,7 @@ public:
   /// index with a negative value.
   ///
   int CreateFixedObject(uint64_t Size, int64_t SPOffset,
-                        bool Immutable = true);
+                        bool Immutable, bool isSS);
   
   
   /// isFixedObjectIndex - Returns true if the specified index corresponds to a
@@ -374,13 +381,25 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].Size == ~0ULL;
   }
 
-  /// CreateStackObject - Create a new statically sized stack object, returning
-  /// a nonnegative identifier to represent it.
+  /// CreateStackObject - Create a new statically sized stack object,
+  /// returning a nonnegative identifier to represent it.
   ///
-  int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS = false) {
+  int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS) {
     assert(Size != 0 && "Cannot allocate zero size stack objects!");
     Objects.push_back(StackObject(Size, Alignment, 0, false, isSS));
-    return (int)Objects.size()-NumFixedObjects-1;
+    int Index = (int)Objects.size()-NumFixedObjects-1;
+    assert(Index >= 0 && "Bad frame index!");
+    return Index;
+  }
+
+  /// CreateSpillStackObject - Create a new statically sized stack
+  /// object that represents a spill slot, returning a nonnegative
+  /// identifier to represent it.
+  ///
+  int CreateSpillStackObject(uint64_t Size, unsigned Alignment) {
+    CreateStackObject(Size, Alignment, true);
+    int Index = (int)Objects.size()-NumFixedObjects-1;
+    return Index;
   }
 
   /// RemoveStackObject - Remove or mark dead a statically sized stack object.
@@ -397,10 +416,10 @@ public:
   ///
   int CreateVariableSizedObject() {
     HasVarSizedObjects = true;
-    Objects.push_back(StackObject(0, 1));
+    Objects.push_back(StackObject(0, 1, 0, false, false));
     return (int)Objects.size()-NumFixedObjects-1;
   }
-  
+
   /// getCalleeSavedInfo - Returns a reference to call saved info vector for the
   /// current function.
   const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const {
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 40260ea..d2f5224 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -23,7 +23,6 @@
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Recycler.h"
-#include <map>
 
 namespace llvm {
 
@@ -115,6 +114,9 @@ class MachineFunction {
   // The alignment of the function.
   unsigned Alignment;
 
+  MachineFunction(const MachineFunction &); // intentionally unimplemented
+  void operator=(const MachineFunction&);   // intentionally unimplemented
+
 public:
   MachineFunction(Function *Fn, const TargetMachine &TM);
   ~MachineFunction();
@@ -229,6 +231,10 @@ public:
   ///
   void dump() const;
 
+  /// verify - Run the current MachineFunction through the machine code
+  /// verifier, useful for debugger use.
+  void verify() const;
+
   // Provide accessors for the MachineBasicBlock list...
   typedef BasicBlockListType::iterator iterator;
   typedef BasicBlockListType::const_iterator const_iterator;
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
index d020a7b..aa4cc91 100644
--- a/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -31,7 +31,7 @@ private:
 
 public:
   static char ID;
-  explicit MachineFunctionAnalysis(TargetMachine &tm,
+  explicit MachineFunctionAnalysis(const TargetMachine &tm,
                                    CodeGenOpt::Level OL = CodeGenOpt::Default);
   ~MachineFunctionAnalysis();
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index de22710..c620449 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Support/DebugLoc.h"
@@ -45,6 +46,13 @@ private:
   unsigned short NumImplicitOps;        // Number of implicit operands (which
                                         // are determined at construction time).
 
+  unsigned short AsmPrinterFlags;       // Various bits of information used by
+                                        // the AsmPrinter to emit helpful
+                                        // comments.  This is *not* semantic
+                                        // information.  Do not use this for
+                                        // anything other than to convey comment
+                                        // information to AsmPrinter.
+
   std::vector<MachineOperand> Operands; // the operands
   mmo_iterator MemRefs;                 // information on memory references
   mmo_iterator MemRefsEnd;
@@ -107,6 +115,22 @@ public:
   const MachineBasicBlock* getParent() const { return Parent; }
   MachineBasicBlock* getParent() { return Parent; }
 
+  /// getAsmPrinterFlags - Return the asm printer flags bitvector.
+  ///
+  unsigned short getAsmPrinterFlags() const { return AsmPrinterFlags; }
+
+  /// getAsmPrinterFlag - Return whether an AsmPrinter flag is set.
+  ///
+  bool getAsmPrinterFlag(AsmPrinter::CommentFlag Flag) const {
+    return AsmPrinterFlags & Flag;
+  }
+
+  /// setAsmPrinterFlag - Set a flag for the AsmPrinter.
+  ///
+  void setAsmPrinterFlag(unsigned short Flag) {
+    AsmPrinterFlags |= Flag;
+  }
+
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
   DebugLoc getDebugLoc() const { return debugLoc; }
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 3ff2f2e..57c65c8 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -69,6 +69,11 @@ public:
   /// the jump tables to branch to New instead.
   bool ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New);
 
+  /// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+  /// the jump table to branch to New instead.
+  bool ReplaceMBBInJumpTable(unsigned Idx, MachineBasicBlock *Old,
+                             MachineBasicBlock *New);
+
   /// getEntrySize - Returns the size of an individual field in a jump table. 
   ///
   unsigned getEntrySize() const { return EntrySize; }
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index b7e267d..5dee199 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -16,6 +16,8 @@
 #ifndef LLVM_CODEGEN_MACHINEMEMOPERAND_H
 #define LLVM_CODEGEN_MACHINEMEMOPERAND_H
 
+#include "llvm/System/DataTypes.h"
+
 namespace llvm {
 
 class Value;
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index f2b027b..47616ce 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -44,8 +44,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Metadata.h"
 
-#define ATTACH_DEBUG_INFO_TO_AN_INSN 1
-
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
@@ -150,7 +148,8 @@ class MachineModuleInfo : public ImmutablePass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  typedef SmallVector< std::pair<TrackingVH<MDNode>, unsigned>, 4 > 
+  typedef std::pair<unsigned, TrackingVH<MDNode> > UnsignedAndMDNodePair;
+  typedef SmallVector< std::pair<TrackingVH<MDNode>, UnsignedAndMDNodePair>, 4>
     VariableDbgInfoMapTy;
   VariableDbgInfoMapTy VariableDbgInfo;
 
@@ -336,8 +335,8 @@ public:
 
   /// setVariableDbgInfo - Collect information used to emit debugging information
   /// of a variable.
-  void setVariableDbgInfo(MDNode *N, unsigned S) {
-    VariableDbgInfo.push_back(std::make_pair(N, S));
+  void setVariableDbgInfo(MDNode *N, unsigned Slot, MDNode *Scope) {
+    VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Scope)));
   }
 
   VariableDbgInfoMapTy &getVariableDbgInfo() {  return VariableDbgInfo;  }
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 18e6020..c55cb32 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -243,6 +243,12 @@ public:
         return true;
     return false;
   }
+  bool isLiveOut(unsigned Reg) const {
+    for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+      if (*I == Reg)
+        return true;
+    return false;
+  }
 
 private:
   void HandleVRegListReallocation();
diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h
index 6ea8f07..1c15fab 100644
--- a/include/llvm/CodeGen/MachineRelocation.h
+++ b/include/llvm/CodeGen/MachineRelocation.h
@@ -65,7 +65,7 @@ class MachineRelocation {
 
   unsigned TargetReloType : 6; // The target relocation ID
   AddressType AddrType    : 4; // The field of Target to use
-  bool NeedStub           : 1; // True if this relocation requires a stub
+  bool MayNeedFarStub     : 1; // True if this relocation may require a far-stub
   bool GOTRelative        : 1; // Should this relocation be relative to the GOT?
   bool TargetResolve      : 1; // True if target should resolve the address
 
@@ -81,7 +81,7 @@ public:
   ///
   static MachineRelocation getGV(uintptr_t offset, unsigned RelocationType, 
                                  GlobalValue *GV, intptr_t cst = 0,
-                                 bool NeedStub = 0,
+                                 bool MayNeedFarStub = 0,
                                  bool GOTrelative = 0) {
     assert((RelocationType & ~63) == 0 && "Relocation type too large!");
     MachineRelocation Result;
@@ -89,7 +89,7 @@ public:
     Result.ConstantVal = cst;
     Result.TargetReloType = RelocationType;
     Result.AddrType = isGV;
-    Result.NeedStub = NeedStub;
+    Result.MayNeedFarStub = MayNeedFarStub;
     Result.GOTRelative = GOTrelative;
     Result.TargetResolve = false;
     Result.Target.GV = GV;
@@ -101,7 +101,7 @@ public:
   static MachineRelocation getIndirectSymbol(uintptr_t offset,
                                              unsigned RelocationType, 
                                              GlobalValue *GV, intptr_t cst = 0,
-                                             bool NeedStub = 0,
+                                             bool MayNeedFarStub = 0,
                                              bool GOTrelative = 0) {
     assert((RelocationType & ~63) == 0 && "Relocation type too large!");
     MachineRelocation Result;
@@ -109,7 +109,7 @@ public:
     Result.ConstantVal = cst;
     Result.TargetReloType = RelocationType;
     Result.AddrType = isIndirectSym;
-    Result.NeedStub = NeedStub;
+    Result.MayNeedFarStub = MayNeedFarStub;
     Result.GOTRelative = GOTrelative;
     Result.TargetResolve = false;
     Result.Target.GV = GV;
@@ -126,7 +126,7 @@ public:
     Result.ConstantVal = cst;
     Result.TargetReloType = RelocationType;
     Result.AddrType = isBB;
-    Result.NeedStub = false;
+    Result.MayNeedFarStub = false;
     Result.GOTRelative = false;
     Result.TargetResolve = false;
     Result.Target.MBB = MBB;
@@ -145,7 +145,7 @@ public:
     Result.ConstantVal = cst;
     Result.TargetReloType = RelocationType;
     Result.AddrType = isExtSym;
-    Result.NeedStub = true;
+    Result.MayNeedFarStub = true;
     Result.GOTRelative = GOTrelative;
     Result.TargetResolve = false;
     Result.Target.ExtSym = ES;
@@ -164,7 +164,7 @@ public:
     Result.ConstantVal = cst;
     Result.TargetReloType = RelocationType;
     Result.AddrType = isConstPool;
-    Result.NeedStub = false;
+    Result.MayNeedFarStub = false;
     Result.GOTRelative = false;
     Result.TargetResolve = letTargetResolve;
     Result.Target.Index = CPI;
@@ -183,7 +183,7 @@ public:
     Result.ConstantVal = cst;
     Result.TargetReloType = RelocationType;
     Result.AddrType = isJumpTable;
-    Result.NeedStub = false;
+    Result.MayNeedFarStub = false;
     Result.GOTRelative = false;
     Result.TargetResolve = letTargetResolve;
     Result.Target.Index = JTI;
@@ -258,12 +258,14 @@ public:
     return GOTRelative;
   }
 
-  /// doesntNeedStub - This function returns true if the JIT for this target
-  /// target is capable of directly handling the relocated GlobalValue reference
-  /// without using either a stub function or issuing an extra load to get the
-  /// GV address.
-  bool doesntNeedStub() const {
-    return !NeedStub;
+  /// mayNeedFarStub - This function returns true if the JIT for this target may
+  /// need either a stub function or an indirect global-variable load to handle
+  /// the relocated GlobalValue reference.  For example, the x86-64 call
+  /// instruction can only call functions within +/-2GB of the call site.
+  /// Anything farther away needs a longer mov+call sequence, which can't just
+  /// be written on top of the existing call.
+  bool mayNeedFarStub() const {
+    return MayNeedFarStub;
   }
 
   /// letTargetResolve - Return true if the target JITInfo is usually
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index 26392f5..bace631 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -32,7 +32,7 @@ namespace llvm {
     virtual void printCustom(raw_ostream &O) const;
 
   public:
-    PseudoSourceValue();
+    explicit PseudoSourceValue(enum ValueTy Subclass = PseudoSourceValueVal);
 
     /// isConstant - Test whether the memory pointed to by this
     /// PseudoSourceValue has a constant value.
@@ -52,7 +52,8 @@ namespace llvm {
     ///
     static inline bool classof(const PseudoSourceValue *) { return true; }
     static inline bool classof(const Value *V) {
-      return V->getValueID() == PseudoSourceValueVal;
+      return V->getValueID() == PseudoSourceValueVal ||
+             V->getValueID() == FixedStackPseudoSourceValueVal;
     }
 
     /// A pseudo source value referencing a fixed stack frame entry,
@@ -76,6 +77,36 @@ namespace llvm {
     /// constant, this doesn't need to identify a specific jump table.
     static const PseudoSourceValue *getJumpTable();
   };
+
+  /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
+  /// for holding FixedStack values, which must include a frame
+  /// index.
+  class FixedStackPseudoSourceValue : public PseudoSourceValue {
+    const int FI;
+  public:
+    explicit FixedStackPseudoSourceValue(int fi) :
+        PseudoSourceValue(FixedStackPseudoSourceValueVal), FI(fi) {}
+
+    /// classof - Methods for support type inquiry through isa, cast, and
+    /// dyn_cast:
+    ///
+    static inline bool classof(const FixedStackPseudoSourceValue *) {
+      return true;
+    }
+    static inline bool classof(const Value *V) {
+      return V->getValueID() == FixedStackPseudoSourceValueVal;
+    }
+
+    virtual bool isConstant(const MachineFrameInfo *MFI) const;
+
+    virtual bool isAliased(const MachineFrameInfo *MFI) const;
+
+    virtual bool mayAlias(const MachineFrameInfo *) const;
+
+    virtual void printCustom(raw_ostream &OS) const;
+
+    int getFrameIndex() const { return FI; }
+  };
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 7a40f02..c404ab6 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -41,22 +41,27 @@ namespace RTLIB {
     SRA_I32,
     SRA_I64,
     SRA_I128,
+    MUL_I8,
     MUL_I16,
     MUL_I32,
     MUL_I64,
     MUL_I128,
+    SDIV_I8,
     SDIV_I16,
     SDIV_I32,
     SDIV_I64,
     SDIV_I128,
+    UDIV_I8,
     UDIV_I16,
     UDIV_I32,
     UDIV_I64,
     UDIV_I128,
+    SREM_I8,
     SREM_I16,
     SREM_I32,
     SREM_I64,
     SREM_I128,
+    UREM_I8,
     UREM_I16,
     UREM_I32,
     UREM_I64,
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index f960851..d4d40b1 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1953,10 +1953,10 @@ public:
   /// that value are zero, and the corresponding bits in the SplatUndef mask
   /// are set.  The SplatBitSize value is set to the splat element size in
   /// bits.  HasAnyUndefs is set to true if any bits in the vector are
-  /// undefined.
+  /// undefined.  isBigEndian describes the endianness of the target.
   bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
                        unsigned &SplatBitSize, bool &HasAnyUndefs,
-                       unsigned MinSplatBits = 0);
+                       unsigned MinSplatBits = 0, bool isBigEndian = false);
 
   static inline bool classof(const BuildVectorSDNode *) { return true; }
   static inline bool classof(const SDNode *N) {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index a179725..65d85fc 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -39,9 +39,6 @@ namespace llvm {
   class IndexListEntry {
   private:
 
-    static std::auto_ptr<IndexListEntry> emptyKeyEntry,
-                                         tombstoneKeyEntry;
-    typedef enum { EMPTY_KEY, TOMBSTONE_KEY } ReservedEntryType;
     static const unsigned EMPTY_KEY_INDEX = ~0U & ~3U,
                           TOMBSTONE_KEY_INDEX = ~0U & ~7U;
 
@@ -49,6 +46,10 @@ namespace llvm {
     MachineInstr *mi;
     unsigned index;
 
+  protected:
+
+    typedef enum { EMPTY_KEY, TOMBSTONE_KEY } ReservedEntryType;
+
     // This constructor is only to be used by getEmptyKeyEntry
     // & getTombstoneKeyEntry. It sets index to the given
     // value and mi to zero.
@@ -58,6 +59,8 @@ namespace llvm {
         case TOMBSTONE_KEY: index = TOMBSTONE_KEY_INDEX; break;
         default: assert(false && "Invalid value for constructor."); 
       }
+      next = this;
+      prev = this;
     }
 
   public:
@@ -70,36 +73,45 @@ namespace llvm {
     }
 
     MachineInstr* getInstr() const { return mi; }
-    void setInstr(MachineInstr *mi) { this->mi = mi; }
+    void setInstr(MachineInstr *mi) {
+      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to modify reserved index.");
+      this->mi = mi;
+    }
 
     unsigned getIndex() const { return index; }
-    void setIndex(unsigned index) { this->index = index; }
+    void setIndex(unsigned index) {
+      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to set index to invalid value.");
+      assert(this->index != EMPTY_KEY_INDEX &&
+             this->index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to reset reserved index value.");
+      this->index = index;
+    }
     
     IndexListEntry* getNext() { return next; }
     const IndexListEntry* getNext() const { return next; }
-    void setNext(IndexListEntry *next) { this->next = next; }
+    void setNext(IndexListEntry *next) {
+      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to modify reserved index.");
+      this->next = next;
+    }
 
     IndexListEntry* getPrev() { return prev; }
     const IndexListEntry* getPrev() const { return prev; }
-    void setPrev(IndexListEntry *prev) { this->prev = prev; }
+    void setPrev(IndexListEntry *prev) {
+      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to modify reserved index.");
+      this->prev = prev;
+    }
 
     // This function returns the index list entry that is to be used for empty
     // SlotIndex keys.
-    static IndexListEntry* getEmptyKeyEntry() {
-      if (emptyKeyEntry.get() == 0) {
-        emptyKeyEntry.reset(new IndexListEntry(EMPTY_KEY));
-      }
-      return emptyKeyEntry.get();
-    }
+    static IndexListEntry* getEmptyKeyEntry();
 
     // This function returns the index list entry that is to be used for
     // tombstone SlotIndex keys.
-    static IndexListEntry* getTombstoneKeyEntry() {
-      if (tombstoneKeyEntry.get() == 0) {
-        tombstoneKeyEntry.reset(new IndexListEntry(TOMBSTONE_KEY));
-      }
-      return tombstoneKeyEntry.get();
-    } 
+    static IndexListEntry* getTombstoneKeyEntry();
   };
 
   // Specialize PointerLikeTypeTraits for IndexListEntry.
@@ -118,7 +130,7 @@ namespace llvm {
   /// SlotIndex - An opaque wrapper around machine indexes.
   class SlotIndex {
     friend class SlotIndexes;
-    friend class DenseMapInfo<SlotIndex>;
+    friend struct DenseMapInfo<SlotIndex>;
 
   private:
     static const unsigned PHI_BIT = 1 << 2;
@@ -475,7 +487,7 @@ namespace llvm {
     void dump() const;
 
     /// Renumber the index list, providing space for new instructions.
-    void renumber();
+    void renumberIndexes();
 
     /// Returns the zero index for this analysis.
     SlotIndex getZeroIndex() {
@@ -635,99 +647,89 @@ namespace llvm {
       return 0;
     }
 
-    /// Returns true if there is a gap in the numbering before the given index.
-    bool hasGapBeforeInstr(SlotIndex index) {
-      index = index.getBaseIndex();
-      SlotIndex prevIndex = index.getPrevIndex();
-      
-      if (prevIndex == getZeroIndex())
-        return false;
-
-      if (getInstructionFromIndex(prevIndex) == 0)
-        return true;
-
-      if (prevIndex.distance(index) >= 2 * SlotIndex::NUM)
-        return true;
+    /// Insert the given machine instruction into the mapping. Returns the
+    /// assigned index.
+    SlotIndex insertMachineInstrInMaps(MachineInstr *mi,
+                                        bool *deferredRenumber = 0) {
+      assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed.");
 
-      return false;
-    }
-
-    /// Returns true if there is a gap in the numbering after the given index.
-    bool hasGapAfterInstr(SlotIndex index) const {
-      // Not implemented yet.
-      assert(false &&
-             "SlotIndexes::hasGapAfterInstr(SlotIndex) not implemented yet.");
-      return false;
-    }
+      MachineBasicBlock *mbb = mi->getParent();
 
-    /// findGapBeforeInstr - Find an empty instruction slot before the
-    /// specified index. If "Furthest" is true, find one that's furthest
-    /// away from the index (but before any index that's occupied).
-    // FIXME: This whole method should go away in future. It should
-    // always be possible to insert code between existing indices.
-    SlotIndex findGapBeforeInstr(SlotIndex index, bool furthest = false) {
-      if (index == getZeroIndex())
-        return getInvalidIndex();
+      assert(mbb != 0 && "Instr must be added to function.");
 
-      index = index.getBaseIndex();
-      SlotIndex prevIndex = index.getPrevIndex();
+      MBB2IdxMap::iterator mbbRangeItr = mbb2IdxMap.find(mbb);
 
-      if (prevIndex == getZeroIndex())
-        return getInvalidIndex();
+      assert(mbbRangeItr != mbb2IdxMap.end() &&
+             "Instruction's parent MBB has not been added to SlotIndexes.");
 
-      // Try to reuse existing index objects with null-instrs.
-      if (getInstructionFromIndex(prevIndex) == 0) {
-        if (furthest) {
-          while (getInstructionFromIndex(prevIndex) == 0 &&
-                 prevIndex != getZeroIndex()) {
-            prevIndex = prevIndex.getPrevIndex();
-          }
+      MachineBasicBlock::iterator miItr(mi);
+      bool needRenumber = false;
+      IndexListEntry *newEntry;
 
-          prevIndex = prevIndex.getNextIndex();
-        }
- 
-        assert(getInstructionFromIndex(prevIndex) == 0 && "Index list is broken.");
-
-        return prevIndex;
+      IndexListEntry *prevEntry;
+      if (miItr == mbb->begin()) {
+        // If mi is at the mbb beginning, get the prev index from the mbb.
+        prevEntry = &mbbRangeItr->second.first.entry();
+      } else {
+        // Otherwise get it from the previous instr.
+        MachineBasicBlock::iterator pItr(prior(miItr));
+        prevEntry = &getInstructionIndex(pItr).entry();
       }
 
-      int dist = prevIndex.distance(index);
+      // Get next entry from previous entry.
+      IndexListEntry *nextEntry = prevEntry->getNext();
 
-      // Double check that the spacing between this instruction and
-      // the last is sane.
-      assert(dist >= SlotIndex::NUM &&
-             "Distance between indexes too small.");
+      // Get a number for the new instr, or 0 if there's no room currently.
+      // In the latter case we'll force a renumber later.
+      unsigned dist = nextEntry->getIndex() - prevEntry->getIndex();
+      unsigned newNumber = dist > SlotIndex::NUM ?
+        prevEntry->getIndex() + ((dist >> 1) & ~3U) : 0;
 
-      // If there's no gap return an invalid index.
-      if (dist < 2*SlotIndex::NUM) {
-        return getInvalidIndex();
+      if (newNumber == 0) {
+        needRenumber = true;
       }
 
-      // Otherwise insert new index entries into the list using the
-      // gap in the numbering.
-      IndexListEntry *newEntry =
-        createEntry(0, prevIndex.entry().getIndex() + SlotIndex::NUM);
+      // Insert a new list entry for mi.
+      newEntry = createEntry(mi, newNumber);
+      insert(nextEntry, newEntry);
+  
+      SlotIndex newIndex(newEntry, SlotIndex::LOAD);
+      mi2iMap.insert(std::make_pair(mi, newIndex));
+
+      if (miItr == mbb->end()) {
+        // If this is the last instr in the MBB then we need to fix up the bb
+        // range:
+        mbbRangeItr->second.second = SlotIndex(newEntry, SlotIndex::STORE);
+      }
 
-      insert(&index.entry(), newEntry);
+      // Renumber if we need to.
+      if (needRenumber) {
+        if (deferredRenumber == 0)
+          renumberIndexes();
+        else
+          *deferredRenumber = true;
+      }
 
-      // And return a pointer to the entry at the start of the gap.
-      return index.getPrevIndex();
+      return newIndex;
     }
 
-    /// Insert the given machine instruction into the mapping at the given
-    /// index.
-    void insertMachineInstrInMaps(MachineInstr *mi, SlotIndex index) {
-      index = index.getBaseIndex();
-      IndexListEntry *miEntry = &index.entry();
-      assert(miEntry->getInstr() == 0 && "Index already in use.");
-      miEntry->setInstr(mi);
+    /// Add all instructions in the vector to the index list. This method will
+    /// defer renumbering until all instrs have been added, and should be 
+    /// preferred when adding multiple instrs.
+    void insertMachineInstrsInMaps(SmallVectorImpl<MachineInstr*> &mis) {
+      bool renumber = false;
 
-      assert(mi2iMap.find(mi) == mi2iMap.end() &&
-             "MachineInstr already has an index.");
+      for (SmallVectorImpl<MachineInstr*>::iterator
+           miItr = mis.begin(), miEnd = mis.end();
+           miItr != miEnd; ++miItr) {
+        insertMachineInstrInMaps(*miItr, &renumber);
+      }
 
-      mi2iMap.insert(std::make_pair(mi, index));
+      if (renumber)
+        renumberIndexes();
     }
 
+
     /// Remove the given machine instruction from the mapping.
     void removeMachineInstrFromMaps(MachineInstr *mi) {
       // remove index -> MachineInstr and
diff --git a/include/llvm/CompilerDriver/BuiltinOptions.h b/include/llvm/CompilerDriver/BuiltinOptions.h
index fe44c30..0c1bbe2 100644
--- a/include/llvm/CompilerDriver/BuiltinOptions.h
+++ b/include/llvm/CompilerDriver/BuiltinOptions.h
@@ -25,10 +25,11 @@ extern llvm::cl::opt<std::string> OutputFilename;
 extern llvm::cl::opt<std::string> TempDirname;
 extern llvm::cl::list<std::string> Languages;
 extern llvm::cl::opt<bool> DryRun;
+extern llvm::cl::opt<bool> Time;
 extern llvm::cl::opt<bool> VerboseMode;
 extern llvm::cl::opt<bool> CheckGraph;
-extern llvm::cl::opt<bool> WriteGraph;
 extern llvm::cl::opt<bool> ViewGraph;
+extern llvm::cl::opt<bool> WriteGraph;
 extern llvm::cl::opt<SaveTempsEnum::Values> SaveTemps;
 
 #endif // LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index fa5d316..1f48ae9 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -9,6 +9,21 @@
 /* Define if CBE is enabled for printf %a output */
 #undef ENABLE_CBE_PRINTF_A
 
+/* Directories clang will search for headers */
+#define C_INCLUDE_DIRS "${C_INCLUDE_DIRS}"
+
+/* Directory clang will search for libstdc++ headers */
+#define CXX_INCLUDE_ROOT "${CXX_INCLUDE_ROOT}"
+
+/* Architecture of libstdc++ headers */
+#define CXX_INCLUDE_ARCH "${CXX_INCLUDE_ARCH}"
+
+/* 32 bit multilib directory */
+#define CXX_INCLUDE_32BIT_DIR "${CXX_INCLUDE_32BIT_DIR}"
+
+/* 64 bit multilib directory */
+#define CXX_INCLUDE_64BIT_DIR "${CXX_INCLUDE_64BIT_DIR}"
+
 /* Define if position independent code is enabled */
 #cmakedefine ENABLE_PIC ${ENABLE_PIC}
 
@@ -48,6 +63,9 @@
 /* Define to 1 if you have the `ceilf' function. */
 #cmakedefine HAVE_CEILF ${HAVE_CEILF}
 
+/* Define if the neat program is available */
+#cmakedefine HAVE_CIRCO ${HAVE_CIRCO}
+
 /* Define to 1 if you have the `closedir' function. */
 #undef HAVE_CLOSEDIR
 
@@ -77,10 +95,10 @@
 #cmakedefine HAVE_DL_H ${HAVE_DL_H}
 
 /* Define if the dot program is available */
-#undef HAVE_DOT
+#cmakedefine HAVE_DOT ${HAVE_DOT}
 
 /* Define if the dotty program is available */
-#undef HAVE_DOTTY
+#cmakedefine HAVE_DOTTY ${HAVE_DOTTY}
 
 /* Define if you have the _dyld_func_lookup function. */
 #undef HAVE_DYLD
@@ -97,6 +115,9 @@
 /* Define to 1 if you have the <fcntl.h> header file. */
 #cmakedefine HAVE_FCNTL_H ${HAVE_FCNTL_H}
 
+/* Define if the neat program is available */
+#cmakedefine HAVE_FDP ${HAVE_FDP}
+
 /* Set to 1 if the finite function is found in <ieeefp.h> */
 #cmakedefine HAVE_FINITE_IN_IEEEFP_H ${HAVE_FINITE_IN_IEEEFP_H}
 
@@ -137,7 +158,7 @@
 #undef HAVE_GRAPHVIZ
 
 /* Define if the gv program is available */
-#undef HAVE_GV
+#cmakedefine HAVE_GV ${HAVE_GV}
 
 /* Define to 1 if you have the `index' function. */
 #undef HAVE_INDEX
@@ -247,7 +268,10 @@
 #cmakedefine HAVE_NDIR_H ${HAVE_NDIR_H}
 
 /* Define to 1 if you have the `nearbyintf' function. */
-#undef HAVE_NEARBYINTF
+#cmakedefine HAVE_NEARBYINTF ${HAVE_NEARBYINTF}
+
+/* Define if the neat program is available */
+#cmakedefine HAVE_NEATO ${HAVE_NEATO}
 
 /* Define to 1 if you have the `opendir' function. */
 #undef HAVE_OPENDIR
@@ -410,6 +434,9 @@
 /* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
 #cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H}
 
+/* Define if the neat program is available */
+#cmakedefine HAVE_TWOPI ${HAVE_TWOPI}
+
 /* Define to 1 if the system has the type `uint64_t'. */
 #undef HAVE_UINT64_T
 
@@ -467,17 +494,29 @@
 /* Added by Kevin -- Maximum path length */
 #cmakedefine MAXPATHLEN ${MAXPATHLEN}
 
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#cmakedefine LLVM_PATH_CIRCO "${LLVM_PATH_CIRCO}"
+
 /* Define to path to dot program if found or 'echo dot' otherwise */
-#undef LLVM_PATH_DOT
+#cmakedefine LLVM_PATH_DOT "${LLVM_PATH_DOT}"
 
 /* Define to path to dotty program if found or 'echo dotty' otherwise */
-#undef LLVM_PATH_DOTTY
+#cmakedefine LLVM_PATH_DOTTY "${LLVM_PATH_DOTTY}"
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#cmakedefine LLVM_PATH_FDP "${LLVM_PATH_FDP}"
 
 /* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
 #undef LLVM_PATH_GRAPHVIZ
 
 /* Define to path to gv program if found or 'echo gv' otherwise */
-#undef LLVM_PATH_GV
+#cmakedefine LLVM_PATH_GV "${LLVM_PATH_GV}"
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#cmakedefine LLVM_PATH_NEATO "${LLVM_PATH_NEATO}"
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
 
 /* Installation prefix directory */
 #undef LLVM_PREFIX
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 5257df9..8051f55 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -8,9 +8,24 @@
    */
 #undef CRAY_STACKSEG_END
 
+/* 32 bit multilib directory. */
+#undef CXX_INCLUDE_32BIT_DIR
+
+/* 64 bit multilib directory. */
+#undef CXX_INCLUDE_64BIT_DIR
+
+/* Arch the libstdc++ headers. */
+#undef CXX_INCLUDE_ARCH
+
+/* Directory with the libstdc++ headers. */
+#undef CXX_INCLUDE_ROOT
+
 /* Define to 1 if using `alloca.c'. */
 #undef C_ALLOCA
 
+/* Directories clang will search for headers */
+#undef C_INCLUDE_DIRS
+
 /* Define if CBE is enabled for printf %a output */
 #undef ENABLE_CBE_PRINTF_A
 
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 99928d9..caa13f6 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -86,7 +86,7 @@ public:
 
   /// Return a ConstantInt constructed from the string strStart with the given
   /// radix. 
-  static ConstantInt *get(const IntegerType *Ty, const StringRef &Str,
+  static ConstantInt *get(const IntegerType *Ty, StringRef Str,
                           uint8_t radix);
   
   /// If Ty is a vector type, return a Constant with a splat of the given
@@ -255,7 +255,7 @@ public:
   /// only be used for simple constant values like 2.0/1.0 etc, that are
   /// known-valid both as host double and as the target format.
   static Constant *get(const Type* Ty, double V);
-  static Constant *get(const Type* Ty, const StringRef &Str);
+  static Constant *get(const Type* Ty, StringRef Str);
   static ConstantFP *get(LLVMContext &Context, const APFloat &V);
   static ConstantFP *getNegativeZero(const Type* Ty);
   static ConstantFP *getInfinity(const Type *Ty, bool Negative = false);
@@ -353,7 +353,7 @@ public:
   /// of the array by one (you've been warned).  However, in some situations 
   /// this is not desired so if AddNull==false then the string is copied without
   /// null termination.
-  static Constant *get(LLVMContext &Context, const StringRef &Initializer,
+  static Constant *get(LLVMContext &Context, StringRef Initializer,
                        bool AddNull = true);
   
   /// Transparently provide more efficient getOperand methods.
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 4b828e46..d2c547d 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -91,7 +91,6 @@ class ExecutionEngine {
   bool CompilingLazily;
   bool GVCompilationDisabled;
   bool SymbolSearchingDisabled;
-  bool DlsymStubsEnabled;
 
   friend class EngineBuilder;  // To allow access to JITCtor and InterpCtor.
 
@@ -114,7 +113,8 @@ protected:
                                      std::string *ErrorStr,
                                      JITMemoryManager *JMM,
                                      CodeGenOpt::Level OptLevel,
-                                     bool GVsWithCode);
+                                     bool GVsWithCode,
+				     CodeModel::Model CMM);
   static ExecutionEngine *(*InterpCtor)(ModuleProvider *MP,
                                         std::string *ErrorStr);
 
@@ -174,7 +174,9 @@ public:
                                     JITMemoryManager *JMM = 0,
                                     CodeGenOpt::Level OptLevel =
                                       CodeGenOpt::Default,
-                                    bool GVsWithCode = true);
+                                    bool GVsWithCode = true,
+				    CodeModel::Model CMM =
+				      CodeModel::Default);
 
   /// addModuleProvider - Add a ModuleProvider to the list of modules that we
   /// can JIT from.  Note that this takes ownership of the ModuleProvider: when
@@ -369,15 +371,7 @@ public:
   bool isSymbolSearchingDisabled() const {
     return SymbolSearchingDisabled;
   }
-  
-  /// EnableDlsymStubs - 
-  void EnableDlsymStubs(bool Enabled = true) {
-    DlsymStubsEnabled = Enabled;
-  }
-  bool areDlsymStubsEnabled() const {
-    return DlsymStubsEnabled;
-  }
-  
+
   /// InstallLazyFunctionCreator - If an unknown function is needed, the
   /// specified function pointer is invoked to create it.  If it returns null,
   /// the JIT will abort.
@@ -434,6 +428,7 @@ class EngineBuilder {
   CodeGenOpt::Level OptLevel;
   JITMemoryManager *JMM;
   bool AllocateGVsWithCode;
+  CodeModel::Model CMModel;
 
   /// InitEngine - Does the common initialization of default options.
   ///
@@ -443,6 +438,7 @@ class EngineBuilder {
     OptLevel = CodeGenOpt::Default;
     JMM = NULL;
     AllocateGVsWithCode = false;
+    CMModel = CodeModel::Default;
   }
 
  public:
@@ -487,6 +483,13 @@ class EngineBuilder {
     return *this;
   }
 
+  /// setCodeModel - Set the CodeModel that the ExecutionEngine target
+  /// data is using. Defaults to target specific default "CodeModel::Default".
+  EngineBuilder &setCodeModel(CodeModel::Model M) {
+    CMModel = M;
+    return *this;
+  }
+
   /// setAllocateGVsWithCode - Sets whether global values should be allocated
   /// into the same buffer as code.  For most applications this should be set
   /// to false.  Allocating globals with code breaks freeMachineCodeForFunction
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 130612e..fd51920 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -71,17 +71,6 @@ public:
   /// return a pointer to its base.
   virtual uint8_t *getGOTBase() const = 0;
   
-  /// SetDlsymTable - If the JIT must be able to relocate stubs after they have
-  /// been emitted, potentially because they are being copied to a process
-  /// where external symbols live at different addresses than in the JITing
-  ///  process, allocate a table with sufficient information to do so.
-  virtual void SetDlsymTable(void *ptr) = 0;
-  
-  /// getDlsymTable - If this is managing a table of entries so that stubs to
-  /// external symbols can be later relocated, this method should return a
-  /// pointer to it.
-  virtual void *getDlsymTable() const = 0;
-  
   /// NeedsExactSize - If the memory manager requires to know the size of the
   /// objects to be emitted
   bool NeedsExactSize() const {
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index 088c999..64be545 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -23,6 +23,7 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Argument.h"
 #include "llvm/Attributes.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
 
@@ -148,7 +149,7 @@ public:
   /// The particular intrinsic functions which correspond to this value are
   /// defined in llvm/Intrinsics.h.
   ///
-  unsigned getIntrinsicID() const;
+  unsigned getIntrinsicID() const ATTRIBUTE_READONLY;
   bool isIntrinsic() const { return getIntrinsicID() != 0; }
 
   /// getCallingConv()/setCallingConv(CC) - These method get and set the
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 7b0de34..b8d219c 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -90,7 +90,7 @@ public:
   
   bool hasSection() const { return !Section.empty(); }
   const std::string &getSection() const { return Section; }
-  void setSection(const StringRef &S) { Section = S; }
+  void setSection(StringRef S) { Section = S; }
   
   /// If the usage is empty (except transitively dead constants), then this
   /// global value can can be safely deleted since the destructor will
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index 56b2b9d..68bd1b3 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -28,7 +28,6 @@ namespace llvm {
 
 class Module;
 class Constant;
-class LLVMContext;
 template<typename ValueSubClass, typename ItemParentClass>
   class SymbolTableListTraits;
 
@@ -50,8 +49,7 @@ public:
   }
   /// GlobalVariable ctor - If a parent module is specified, the global is
   /// automatically inserted into the end of the specified modules global list.
-  GlobalVariable(LLVMContext &Context, const Type *Ty, bool isConstant,
-                 LinkageTypes Linkage,
+  GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes Linkage,
                  Constant *Initializer = 0, const Twine &Name = "",
                  bool ThreadLocal = false, unsigned AddressSpace = 0);
   /// GlobalVariable ctor - This creates a global and inserts it before the
@@ -101,18 +99,10 @@ public:
     assert(hasInitializer() && "GV doesn't have initializer!");
     return static_cast<Constant*>(Op<0>().get());
   }
-  inline void setInitializer(Constant *CPV) {
-    if (CPV == 0) {
-      if (hasInitializer()) {
-        Op<0>().set(0);
-        NumOperands = 0;
-      }
-    } else {
-      if (!hasInitializer())
-        NumOperands = 1;
-      Op<0>().set(CPV);
-    }
-  }
+  /// setInitializer - Sets the initializer for this global variable, removing
+  /// any existing initializer if InitVal==NULL.  If this GV has type T*, the
+  /// initializer must have type T.
+  void setInitializer(Constant *InitVal);
 
   /// If the value is a global constant, its value is immutable throughout the
   /// runtime execution of the program.  Assigning a value into the constant
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index d54870e..482e53e 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -33,16 +33,16 @@ class InlineAsm : public Value {
   bool HasSideEffects;
   bool IsAlignStack;
   
-  InlineAsm(const FunctionType *Ty, const StringRef &AsmString,
-            const StringRef &Constraints, bool hasSideEffects,
+  InlineAsm(const FunctionType *Ty, StringRef AsmString,
+            StringRef Constraints, bool hasSideEffects,
             bool isAlignStack = false);
   virtual ~InlineAsm();
 public:
 
   /// InlineAsm::get - Return the the specified uniqued inline asm string.
   ///
-  static InlineAsm *get(const FunctionType *Ty, const StringRef &AsmString,
-                        const StringRef &Constraints, bool hasSideEffects,
+  static InlineAsm *get(const FunctionType *Ty, StringRef AsmString,
+                        StringRef Constraints, bool hasSideEffects,
                         bool isAlignStack = false);
   
   bool hasSideEffects() const { return HasSideEffects; }
@@ -65,7 +65,7 @@ public:
   /// the specified constraint string is legal for the type.  This returns true
   /// if legal, false if not.
   ///
-  static bool Verify(const FunctionType *Ty, const StringRef &Constraints);
+  static bool Verify(const FunctionType *Ty, StringRef Constraints);
 
   // Constraint String Parsing 
   enum ConstraintPrefix {
@@ -110,7 +110,7 @@ public:
     /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
     /// fields in this structure.  If the constraint string is not understood,
     /// return true, otherwise return false.
-    bool Parse(const StringRef &Str, 
+    bool Parse(StringRef Str, 
                std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar);
   };
   
@@ -118,7 +118,7 @@ public:
   /// constraints and their prefixes.  If this returns an empty vector, and if
   /// the constraint string itself isn't empty, there was an error parsing.
   static std::vector<ConstraintInfo> 
-    ParseConstraints(const StringRef &ConstraintString);
+    ParseConstraints(StringRef ConstraintString);
   
   /// ParseConstraints - Parse the constraints of this inlineasm object, 
   /// returning them the same way that ParseConstraints(str) does.
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index df7eb43..bc89969 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -214,6 +214,27 @@ public:
     return BO;
   }
 
+  /// CreateNUWAdd - Create an Add operator with the NUW flag set.
+  ///
+  static BinaryOperator *CreateNUWAdd(Value *V1, Value *V2,
+                                      const Twine &Name = "") {
+    BinaryOperator *BO = CreateAdd(V1, V2, Name);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUWAdd(Value *V1, Value *V2,
+                                      const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = CreateAdd(V1, V2, Name, BB);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUWAdd(Value *V1, Value *V2,
+                                      const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = CreateAdd(V1, V2, Name, I);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+
   /// CreateNSWSub - Create an Sub operator with the NSW flag set.
   ///
   static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
@@ -235,6 +256,27 @@ public:
     return BO;
   }
 
+  /// CreateNUWSub - Create an Sub operator with the NUW flag set.
+  ///
+  static BinaryOperator *CreateNUWSub(Value *V1, Value *V2,
+                                      const Twine &Name = "") {
+    BinaryOperator *BO = CreateSub(V1, V2, Name);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUWSub(Value *V1, Value *V2,
+                                      const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = CreateSub(V1, V2, Name, BB);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUWSub(Value *V1, Value *V2,
+                                      const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = CreateSub(V1, V2, Name, I);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+
   /// CreateExactSDiv - Create an SDiv operator with the exact flag set.
   ///
   static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
@@ -658,7 +700,7 @@ public:
   /// @brief Create a CmpInst
   static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
                          Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
-
+  
   /// @brief Get the opcode casted to the right type
   OtherOps getOpcode() const {
     return static_cast<OtherOps>(Instruction::getOpcode());
@@ -670,6 +712,18 @@ public:
   /// @brief Set the predicate for this instruction to the specified value.
   void setPredicate(Predicate P) { SubclassData = P; }
 
+  static bool isFPPredicate(Predicate P) {
+    return P >= FIRST_FCMP_PREDICATE && P <= LAST_FCMP_PREDICATE;
+  }
+  
+  static bool isIntPredicate(Predicate P) {
+    return P >= FIRST_ICMP_PREDICATE && P <= LAST_ICMP_PREDICATE;
+  }
+  
+  bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
+  bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
+  
+  
   /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
   ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
   /// @returns the inverse predicate for the instruction's current predicate.
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index 6a8f376..1e1dca2 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -320,6 +320,28 @@ namespace llvm {
     }
   };
   
+  /// MemoryUseIntrinsic - This is the common base class for the memory use
+  /// marker intrinsics.
+  ///
+  struct MemoryUseIntrinsic : public IntrinsicInst {
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const MemoryUseIntrinsic *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::lifetime_start:
+      case Intrinsic::lifetime_end:
+      case Intrinsic::invariant_start:
+      case Intrinsic::invariant_end:
+        return true;
+      default: return false;
+      }
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
 }
 
 #endif
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 5be032b..794f4bf 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -484,13 +484,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Misc.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
   def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
   def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index bcb98c1..4aba210 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -82,6 +82,7 @@ namespace {
       (void) llvm::createInternalizePass(false);
       (void) llvm::createLCSSAPass();
       (void) llvm::createLICMPass();
+      (void) llvm::createLazyValueInfoPass();
       (void) llvm::createLiveValuesPass();
       (void) llvm::createLoopDependenceAnalysisPass();
       (void) llvm::createLoopExtractorPass();
@@ -119,7 +120,6 @@ namespace {
       (void) llvm::createTailDuplicationPass();
       (void) llvm::createJumpThreadingPass();
       (void) llvm::createUnifyFunctionExitNodesPass();
-      (void) llvm::createCondPropagationPass();
       (void) llvm::createNullProfilerRSPass();
       (void) llvm::createRSProfilingPass();
       (void) llvm::createInstCountPass();
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index 1e1da86..a68a2e0 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -65,8 +65,8 @@ class Linker {
     /// Construct the Linker with an empty module which will be given the
     /// name \p progname. \p progname will also be used for error messages.
     /// @brief Construct with empty module
-    Linker(const StringRef &progname, ///< name of tool running linker
-           const StringRef &modulename, ///< name of linker's end-result module
+    Linker(StringRef progname, ///< name of tool running linker
+           StringRef modulename, ///< name of linker's end-result module
            LLVMContext &C, ///< Context for global info
            unsigned Flags = 0  ///< ControlFlags (one or more |'d together)
     );
@@ -74,7 +74,7 @@ class Linker {
     /// Construct the Linker with a previously defined module, \p aModule. Use
     /// \p progname for the name of the program in error messages.
     /// @brief Construct with existing module
-    Linker(const StringRef& progname, Module* aModule, unsigned Flags = 0);
+    Linker(StringRef progname, Module* aModule, unsigned Flags = 0);
 
     /// Destruct the Linker.
     /// @brief Destructor
@@ -214,8 +214,8 @@ class Linker {
     /// @returns true if an error occurs, false otherwise
     /// @brief Link one library into the module
     bool LinkInLibrary (
-      const StringRef &Library, ///< The library to link in
-      bool& is_native             ///< Indicates if lib a native library
+      StringRef Library, ///< The library to link in
+      bool& is_native    ///< Indicates if lib a native library
     );
 
     /// This function links one bitcode archive, \p Filename, into the module.
@@ -267,7 +267,7 @@ class Linker {
     /// will be empty (i.e. sys::Path::isEmpty() will return true).
     /// @returns A sys::Path to the found library
     /// @brief Find a library from its short name.
-    sys::Path FindLib(const StringRef &Filename);
+    sys::Path FindLib(StringRef Filename);
 
   /// @}
   /// @name Implementation
@@ -277,9 +277,9 @@ class Linker {
     /// Module it contains (wrapped in an auto_ptr), or 0 if an error occurs.
     std::auto_ptr<Module> LoadObject(const sys::Path& FN);
 
-    bool warning(const StringRef &message);
-    bool error(const StringRef &message);
-    void verbose(const StringRef &message);
+    bool warning(StringRef message);
+    bool error(StringRef message);
+    void verbose(StringRef message);
 
   /// @}
   /// @name Data
diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h
index e369e30..da471d2 100644
--- a/include/llvm/MC/MCAsmLexer.h
+++ b/include/llvm/MC/MCAsmLexer.h
@@ -56,7 +56,7 @@ struct AsmToken {
 
 public:
   AsmToken() {}
-  AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0)
+  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
 
   TokenKind getKind() const { return Kind; }
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index fa20f45..95c6bd4 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -49,7 +49,7 @@ namespace llvm {
     /// CreateSymbol - Create a new symbol with the specified @param Name.
     ///
     /// @param Name - The symbol name, which must be unique across all symbols.
-    MCSymbol *CreateSymbol(const StringRef &Name);
+    MCSymbol *CreateSymbol(StringRef Name);
 
     /// GetOrCreateSymbol - Lookup the symbol inside with the specified
     /// @param Name.  If it exists, return it.  If not, create a forward
@@ -58,7 +58,7 @@ namespace llvm {
     /// @param Name - The symbol name, which must be unique across all symbols.
     /// @param IsTemporary - Whether this symbol is an assembler temporary,
     /// which should not survive into the symbol table for the translation unit.
-    MCSymbol *GetOrCreateSymbol(const StringRef &Name);
+    MCSymbol *GetOrCreateSymbol(StringRef Name);
     MCSymbol *GetOrCreateSymbol(const Twine &Name);
 
     /// CreateTemporarySymbol - Create a new temporary symbol with the specified
@@ -67,10 +67,10 @@ namespace llvm {
     /// @param Name - The symbol name, for debugging purposes only, temporary
     /// symbols do not surive assembly. If non-empty the name must be unique
     /// across all symbols.
-    MCSymbol *CreateTemporarySymbol(const StringRef &Name = "");
+    MCSymbol *CreateTemporarySymbol(StringRef Name = "");
 
     /// LookupSymbol - Get the symbol for @param Name, or null.
-    MCSymbol *LookupSymbol(const StringRef &Name) const;
+    MCSymbol *LookupSymbol(StringRef Name) const;
 
     /// @}
 
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 4318628..13d40ec 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -120,7 +120,7 @@ public:
   /// @{
 
   static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, MCContext &Ctx);
-  static const MCSymbolRefExpr *Create(const StringRef &Name, MCContext &Ctx);
+  static const MCSymbolRefExpr *Create(StringRef Name, MCContext &Ctx);
 
   /// @}
   /// @name Accessors
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 9e07186..ceb6d27 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -51,13 +51,13 @@ namespace llvm {
     /// of a syntactic one.
     bool IsDirective;
     
-    MCSectionCOFF(const StringRef &name, bool isDirective, SectionKind K)
+    MCSectionCOFF(StringRef name, bool isDirective, SectionKind K)
       : MCSection(K), Name(name), IsDirective(isDirective) {
     }
   public:
     
-    static MCSectionCOFF *Create(const StringRef &Name, bool IsDirective, 
-                                   SectionKind K, MCContext &Ctx);
+    static MCSectionCOFF *Create(StringRef Name, bool IsDirective, 
+                                 SectionKind K, MCContext &Ctx);
 
     const std::string &getName() const { return Name; }
     bool isDirective() const { return IsDirective; }
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 57fa903..4ec745f 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -35,13 +35,13 @@ class MCSectionELF : public MCSection {
   bool IsExplicit;
   
 protected:
-  MCSectionELF(const StringRef &Section, unsigned type, unsigned flags,
+  MCSectionELF(StringRef Section, unsigned type, unsigned flags,
                SectionKind K, bool isExplicit)
     : MCSection(K), SectionName(Section.str()), Type(type), Flags(flags), 
       IsExplicit(isExplicit) {}
 public:
   
-  static MCSectionELF *Create(const StringRef &Section, unsigned Type, 
+  static MCSectionELF *Create(StringRef Section, unsigned Type, 
                               unsigned Flags, SectionKind K, bool isExplicit,
                               MCContext &Ctx);
 
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 251c88f..6156819 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -33,7 +33,7 @@ class MCSectionMachO : public MCSection {
   /// size of stubs, for example.
   unsigned Reserved2;
   
-  MCSectionMachO(const StringRef &Segment, const StringRef &Section,
+  MCSectionMachO(StringRef Segment, StringRef Section,
                  unsigned TAA, unsigned reserved2, SectionKind K)
     : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
     assert(Segment.size() <= 16 && Section.size() <= 16 &&
@@ -52,8 +52,8 @@ class MCSectionMachO : public MCSection {
   }
 public:
   
-  static MCSectionMachO *Create(const StringRef &Segment,
-                                const StringRef &Section,
+  static MCSectionMachO *Create(StringRef Segment,
+                                StringRef Section,
                                 unsigned TypeAndAttributes,
                                 unsigned Reserved2,
                                 SectionKind K, MCContext &Ctx);
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 4d72f32..5febed7 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -155,7 +155,7 @@ namespace llvm {
     ///
     /// This is used to implement assembler directives such as .byte, .ascii,
     /// etc.
-    virtual void EmitBytes(const StringRef &Data) = 0;
+    virtual void EmitBytes(StringRef Data) = 0;
 
     /// EmitValue - Emit the expression @param Value into the output as a native
     /// integer of the given @param Size bytes.
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index c6efe72..cfe04d8 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -56,7 +56,7 @@ namespace llvm {
 
   private:  // MCContext creates and uniques these.
     friend class MCContext;
-    MCSymbol(const StringRef &_Name, bool _IsTemporary)
+    MCSymbol(StringRef _Name, bool _IsTemporary)
       : Name(_Name), Section(0), Value(0), IsTemporary(_IsTemporary) {}
 
     MCSymbol(const MCSymbol&);       // DO NOT IMPLEMENT
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index ed87d42..1d18eba 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -60,6 +60,7 @@ protected:
 
 public:
   static MDString *get(LLVMContext &Context, StringRef Str);
+  static MDString *get(LLVMContext &Context, const char *Str);
   
   StringRef getString() const { return Str; }
 
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 70eba68..04dfb35 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -154,7 +154,7 @@ private:
 public:
   /// The Module constructor. Note that there is no default constructor. You
   /// must provide a name for the module upon construction.
-  explicit Module(const StringRef &ModuleID, LLVMContext& C);
+  explicit Module(StringRef ModuleID, LLVMContext& C);
   /// The module destructor. This will dropAllReferences.
   ~Module();
 
@@ -196,20 +196,20 @@ public:
 public:
 
   /// Set the module identifier.
-  void setModuleIdentifier(const StringRef &ID) { ModuleID = ID; }
+  void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
 
   /// Set the data layout
-  void setDataLayout(const StringRef &DL) { DataLayout = DL; }
+  void setDataLayout(StringRef DL) { DataLayout = DL; }
 
   /// Set the target triple.
-  void setTargetTriple(const StringRef &T) { TargetTriple = T; }
+  void setTargetTriple(StringRef T) { TargetTriple = T; }
 
   /// Set the module-scope inline assembly blocks.
-  void setModuleInlineAsm(const StringRef &Asm) { GlobalScopeAsm = Asm; }
+  void setModuleInlineAsm(StringRef Asm) { GlobalScopeAsm = Asm; }
 
   /// Append to the module-scope inline assembly blocks, automatically
   /// appending a newline to the end.
-  void appendModuleInlineAsm(const StringRef &Asm) {
+  void appendModuleInlineAsm(StringRef Asm) {
     GlobalScopeAsm += Asm;
     GlobalScopeAsm += '\n';
   }
@@ -221,7 +221,7 @@ public:
   /// getNamedValue - Return the first global value in the module with
   /// the specified name, of arbitrary type.  This method returns null
   /// if a global with the specified name is not found.
-  GlobalValue *getNamedValue(const StringRef &Name) const;
+  GlobalValue *getNamedValue(StringRef Name) const;
 
 /// @}
 /// @name Function Accessors
@@ -236,10 +236,10 @@ public:
   ///      the existing function.
   ///   4. Finally, the function exists but has the wrong prototype: return the
   ///      function with a constantexpr cast to the right prototype.
-  Constant *getOrInsertFunction(const StringRef &Name, const FunctionType *T,
+  Constant *getOrInsertFunction(StringRef Name, const FunctionType *T,
                                 AttrListPtr AttributeList);
 
-  Constant *getOrInsertFunction(const StringRef &Name, const FunctionType *T);
+  Constant *getOrInsertFunction(StringRef Name, const FunctionType *T);
 
   /// getOrInsertFunction - Look up the specified function in the module symbol
   /// table.  If it does not exist, add a prototype for the function and return
@@ -248,21 +248,21 @@ public:
   /// named function has a different type.  This version of the method takes a
   /// null terminated list of function arguments, which makes it easier for
   /// clients to use.
-  Constant *getOrInsertFunction(const StringRef &Name,
+  Constant *getOrInsertFunction(StringRef Name,
                                 AttrListPtr AttributeList,
                                 const Type *RetTy, ...)  END_WITH_NULL;
 
   /// getOrInsertFunction - Same as above, but without the attributes.
-  Constant *getOrInsertFunction(const StringRef &Name, const Type *RetTy, ...)
+  Constant *getOrInsertFunction(StringRef Name, const Type *RetTy, ...)
     END_WITH_NULL;
 
-  Constant *getOrInsertTargetIntrinsic(const StringRef &Name,
+  Constant *getOrInsertTargetIntrinsic(StringRef Name,
                                        const FunctionType *Ty,
                                        AttrListPtr AttributeList);
   
   /// getFunction - Look up the specified function in the module symbol table.
   /// If it does not exist, return null.
-  Function *getFunction(const StringRef &Name) const;
+  Function *getFunction(StringRef Name) const;
 
 /// @}
 /// @name Global Variable Accessors
@@ -272,13 +272,13 @@ public:
   /// symbol table.  If it does not exist, return null. If AllowInternal is set
   /// to true, this function will return types that have InternalLinkage. By
   /// default, these types are not returned.
-  GlobalVariable *getGlobalVariable(const StringRef &Name,
+  GlobalVariable *getGlobalVariable(StringRef Name,
                                     bool AllowInternal = false) const;
 
   /// getNamedGlobal - Return the first global variable in the module with the
   /// specified name, of arbitrary type.  This method returns null if a global
   /// with the specified name is not found.
-  GlobalVariable *getNamedGlobal(const StringRef &Name) const {
+  GlobalVariable *getNamedGlobal(StringRef Name) const {
     return getGlobalVariable(Name, true);
   }
 
@@ -289,7 +289,7 @@ public:
   ///      with a constantexpr cast to the right type.
   ///   3. Finally, if the existing global is the correct delclaration, return
   ///      the existing global.
-  Constant *getOrInsertGlobal(const StringRef &Name, const Type *Ty);
+  Constant *getOrInsertGlobal(StringRef Name, const Type *Ty);
 
 /// @}
 /// @name Global Alias Accessors
@@ -298,7 +298,7 @@ public:
   /// getNamedAlias - Return the first global alias in the module with the
   /// specified name, of arbitrary type.  This method returns null if a global
   /// with the specified name is not found.
-  GlobalAlias *getNamedAlias(const StringRef &Name) const;
+  GlobalAlias *getNamedAlias(StringRef Name) const;
 
 /// @}
 /// @name Named Metadata Accessors
@@ -307,12 +307,12 @@ public:
   /// getNamedMetadata - Return the first NamedMDNode in the module with the
   /// specified name. This method returns null if a NamedMDNode with the 
   /// specified name is not found.
-  NamedMDNode *getNamedMetadata(const StringRef &Name) const;
+  NamedMDNode *getNamedMetadata(StringRef Name) const;
 
   /// getOrInsertNamedMetadata - Return the first named MDNode in the module 
   /// with the specified name. This method returns a new NamedMDNode if a 
   /// NamedMDNode with the specified name is not found.
-  NamedMDNode *getOrInsertNamedMetadata(const StringRef &Name);
+  NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
 
 /// @}
 /// @name Type Accessors
@@ -321,7 +321,7 @@ public:
   /// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
   /// there is already an entry for this name, true is returned and the symbol
   /// table is not modified.
-  bool addTypeName(const StringRef &Name, const Type *Ty);
+  bool addTypeName(StringRef Name, const Type *Ty);
 
   /// getTypeName - If there is at least one entry in the symbol table for the
   /// specified type, return it.
@@ -329,7 +329,7 @@ public:
 
   /// getTypeByName - Return the type with the specified name in this module, or
   /// null if there is none by that name.
-  const Type *getTypeByName(const StringRef &Name) const;
+  const Type *getTypeByName(StringRef Name) const;
 
 /// @}
 /// @name Direct access to the globals list, functions list, and symbol table
@@ -415,9 +415,9 @@ public:
   /// @brief Returns the number of items in the list of libraries.
   inline size_t       lib_size()  const { return LibraryList.size();  }
   /// @brief Add a library to the list of dependent libraries
-  void addLibrary(const StringRef &Lib);
+  void addLibrary(StringRef Lib);
   /// @brief Remove a library from the list of dependent libraries
-  void removeLibrary(const StringRef &Lib);
+  void removeLibrary(StringRef Lib);
   /// @brief Get all the libraries
   inline const LibraryListType& getLibraries() const { return LibraryList; }
 
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 6bef2e7..909ccde 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -167,7 +167,7 @@ public:
 
   // lookupPassInfo - Return the pass info object for the pass with the given
   // argument string, or null if it is not known.
-  static const PassInfo *lookupPassInfo(const StringRef &Arg);
+  static const PassInfo *lookupPassInfo(StringRef Arg);
 
   /// getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to
   /// get analysis information that might be around, for example to update it.
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index 690d080..5864fad 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -22,6 +22,7 @@
 #include <vector>
 #include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
@@ -95,7 +96,7 @@ public:
   // This can be useful when a pass is trivially preserved, but may not be
   // linked in. Be careful about spelling!
   //
-  AnalysisUsage &addPreserved(const StringRef &Arg) {
+  AnalysisUsage &addPreserved(StringRef Arg) {
     const PassInfo *PI = Pass::lookupPassInfo(Arg);
     // If the pass exists, preserve it. Otherwise silently do nothing.
     if (PI) Preserved.push_back(PI);
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index 5a8f555..dffc24a 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -284,11 +284,11 @@ public:
   void removeNotPreservedAnalysis(Pass *P);
   
   /// Remove dead passes used by P.
-  void removeDeadPasses(Pass *P, const StringRef &Msg, 
+  void removeDeadPasses(Pass *P, StringRef Msg, 
                         enum PassDebuggingString);
 
   /// Remove P.
-  void freePass(Pass *P, const StringRef &Msg, 
+  void freePass(Pass *P, StringRef Msg, 
                 enum PassDebuggingString);
 
   /// Add pass P into the PassVector. Update 
@@ -344,7 +344,7 @@ public:
   void dumpLastUses(Pass *P, unsigned Offset) const;
   void dumpPassArguments() const;
   void dumpPassInfo(Pass *P, enum PassDebuggingString S1,
-                    enum PassDebuggingString S2, const StringRef &Msg);
+                    enum PassDebuggingString S2, StringRef Msg);
   void dumpRequiredSet(const Pass *P) const;
   void dumpPreservedSet(const Pass *P) const;
 
@@ -388,8 +388,8 @@ protected:
   bool isPassDebuggingExecutionsOrMore() const;
   
 private:
-  void dumpAnalysisUsage(const StringRef &Msg, const Pass *P,
-                           const AnalysisUsage::VectorType &Set) const;
+  void dumpAnalysisUsage(StringRef Msg, const Pass *P,
+                         const AnalysisUsage::VectorType &Set) const;
 
   // Set of available Analysis. This information is used while scheduling 
   // pass. If a pass requires an analysis which is not not available then 
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index ca32f75..2e65fdd 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -495,7 +495,8 @@ public:
 //--------------------------------------------------
 // basic_parser - Super class of parsers to provide boilerplate code
 //
-struct basic_parser_impl {  // non-template implementation of basic_parser<t>
+class basic_parser_impl {  // non-template implementation of basic_parser<t>
+public:
   virtual ~basic_parser_impl() {}
 
   enum ValueExpected getValueExpectedFlagDefault() const {
@@ -525,7 +526,8 @@ struct basic_parser_impl {  // non-template implementation of basic_parser<t>
 // a typedef for the provided data type.
 //
 template<class DataType>
-struct basic_parser : public basic_parser_impl {
+class basic_parser : public basic_parser_impl {
+public:
   typedef DataType parser_data_type;
 };
 
@@ -779,6 +781,8 @@ public:
 
   DataType &getValue() { check(); return *Location; }
   const DataType &getValue() const { check(); return *Location; }
+  
+  operator DataType() const { return this->getValue(); }
 };
 
 
@@ -814,6 +818,8 @@ public:
   DataType &getValue() { return Value; }
   DataType getValue() const { return Value; }
 
+  operator DataType() const { return getValue(); }
+
   // If the datatype is a pointer, support -> on it.
   DataType operator->() const { return Value; }
 };
@@ -863,8 +869,6 @@ public:
 
   ParserClass &getParser() { return Parser; }
 
-  operator DataType() const { return this->getValue(); }
-
   template<class T>
   DataType &operator=(const T &Val) {
     this->setValue(Val);
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 342a97d..da31f98 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -29,6 +29,18 @@
 #define ATTRIBUTE_USED
 #endif
 
+#ifdef __GNUC__ // aka 'ATTRIBUTE_CONST' but following LLVM Conventions.
+#define ATTRIBUTE_READNONE __attribute__((__const__))
+#else
+#define ATTRIBUTE_READNONE
+#endif
+
+#ifdef __GNUC__  // aka 'ATTRIBUTE_PURE' but following LLVM Conventions.
+#define ATTRIBUTE_READONLY __attribute__((__pure__))
+#else
+#define ATTRIBUTE_READONLY
+#endif
+
 #if (__GNUC__ >= 4)
 #define BUILTIN_EXPECT(EXPR, VALUE) __builtin_expect((EXPR), (VALUE))
 #else
@@ -52,12 +64,16 @@
 // method "not for inlining".
 #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
 #define DISABLE_INLINE __attribute__((noinline))
+#elif defined(_MSC_VER)
+#define DISABLE_INLINE __declspec(noinline)
 #else
 #define DISABLE_INLINE
 #endif
 
 #ifdef __GNUC__
 #define NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define NORETURN __declspec(noreturn)
 #else
 #define NORETURN
 #endif
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index b4589af..b73cea0 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -18,7 +18,6 @@
 #define LLVM_SUPPORT_CONSTANTFOLDER_H
 
 #include "llvm/Constants.h"
-#include "llvm/Instruction.h"
 #include "llvm/InstrTypes.h"
 
 namespace llvm {
diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h
index 3846d46..6342c6f 100644
--- a/include/llvm/Support/ConstantRange.h
+++ b/include/llvm/Support/ConstantRange.h
@@ -187,6 +187,14 @@ public:
   /// truncated to the specified type.
   ConstantRange truncate(uint32_t BitWidth) const;
 
+  /// zextOrTrunc - make this range have the bit width given by \p BitWidth. The
+  /// value is zero extended, truncated, or left alone to make it that width.
+  ConstantRange zextOrTrunc(uint32_t BitWidth) const;
+  
+  /// sextOrTrunc - make this range have the bit width given by \p BitWidth. The
+  /// value is sign extended, truncated, or left alone to make it that width.
+  ConstantRange sextOrTrunc(uint32_t BitWidth) const;
+
   /// add - Return a new range representing the possible values resulting
   /// from an addition of a value in this range and a value in Other.
   ConstantRange add(const ConstantRange &Other) const;
@@ -209,6 +217,18 @@ public:
   /// TODO: This isn't fully implemented yet.
   ConstantRange udiv(const ConstantRange &Other) const;
 
+  /// shl - Return a new range representing the possible values resulting
+  /// from a left shift of a value in this range by the Amount value.
+  ConstantRange shl(const ConstantRange &Amount) const;
+
+  /// ashr - Return a new range representing the possible values resulting from
+  /// an arithmetic right shift of a value in this range by the Amount value.
+  ConstantRange ashr(const ConstantRange &Amount) const;
+
+  /// shr - Return a new range representing the possible values resulting
+  /// from a logical right shift of a value in this range by the Amount value.
+  ConstantRange lshr(const ConstantRange &Amount) const;
+
   /// print - Print out the bounds to a stream...
   ///
   void print(raw_ostream &OS) const;
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 67bccf0..6067795 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -60,15 +60,15 @@ namespace llvm {
   /// standard error, followed by a newline.
   /// After the error handler is called this function will call exit(1), it 
   /// does not return.
-  void llvm_report_error(const char *reason) NORETURN;
-  void llvm_report_error(const std::string &reason) NORETURN;
-  void llvm_report_error(const Twine &reason) NORETURN;
+  NORETURN void llvm_report_error(const char *reason);
+  NORETURN void llvm_report_error(const std::string &reason);
+  NORETURN void llvm_report_error(const Twine &reason);
 
   /// This function calls abort(), and prints the optional message to stderr.
   /// Use the llvm_unreachable macro (that adds location info), instead of
   /// calling this function directly.
-  void llvm_unreachable_internal(const char *msg=0, const char *file=0,
-                                 unsigned line=0) NORETURN;
+  NORETURN void llvm_unreachable_internal(const char *msg=0,
+                                          const char *file=0, unsigned line=0);
 }
 
 /// Prints the message and location info to stderr in !NDEBUG builds.
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 4652e8f..2db2477 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -151,6 +151,15 @@ public:
       Context.getMetadata().addMD(MDKind, CurDbgLocation, I);
   }
 
+  /// SetDebugLocation -  Set location information for the given instruction.
+  void SetDebugLocation(Instruction *I, MDNode *Loc) {
+    if (MDKind == 0) 
+      MDKind = Context.getMetadata().getMDKind("dbg");
+    if (MDKind == 0)
+      MDKind = Context.getMetadata().registerMDKind("dbg");
+    Context.getMetadata().addMD(MDKind, Loc, I);
+  }
+
   /// Insert - Insert and return the specified instruction.
   template<typename InstTy>
   InstTy *Insert(InstTy *I, const Twine &Name = "") const {
@@ -700,6 +709,11 @@ public:
       return Folder.CreateIntCast(VC, DestTy, isSigned);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
   }
+private:
+  // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a compile time
+  // error, instead of converting the string to bool for the isSigned parameter.
+  Value *CreateIntCast(Value *, const Type *, const char *); // DO NOT IMPLEMENT
+public:
   Value *CreateFPCast(Value *V, const Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index f9a4d6d..95d0d3d 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -57,7 +57,7 @@ public:
   /// MemoryBuffer if successful, otherwise returning null.  If FileSize is
   /// specified, this means that the client knows that the file exists and that
   /// it has the specified size.
-  static MemoryBuffer *getFile(const char *Filename,
+  static MemoryBuffer *getFile(StringRef Filename,
                                std::string *ErrStr = 0,
                                int64_t FileSize = -1);
 
@@ -84,29 +84,18 @@ public:
   /// memory allocated by this method.  The memory is owned by the MemoryBuffer
   /// object.
   static MemoryBuffer *getNewUninitMemBuffer(size_t Size,
-                                             const char *BufferName = "");
+                                             StringRef BufferName = "");
 
-  /// getSTDIN - Read all of stdin into a file buffer, and return it.  This
-  /// returns null if stdin is empty.
+  /// getSTDIN - Read all of stdin into a file buffer, and return it.
   static MemoryBuffer *getSTDIN();
 
 
   /// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
   /// if the Filename is "-".  If an error occurs, this returns null and fills
-  /// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
-  /// returns an empty buffer.
-  static MemoryBuffer *getFileOrSTDIN(const char *Filename,
-                                      std::string *ErrStr = 0,
-                                      int64_t FileSize = -1);
-
-  /// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
-  /// if the Filename is "-".  If an error occurs, this returns null and fills
   /// in *ErrStr with a reason.
-  static MemoryBuffer *getFileOrSTDIN(const std::string &FN,
+  static MemoryBuffer *getFileOrSTDIN(StringRef Filename,
                                       std::string *ErrStr = 0,
-                                      int64_t FileSize = -1) {
-    return getFileOrSTDIN(FN.c_str(), ErrStr, FileSize);
-  }
+                                      int64_t FileSize = -1);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index 1a6d06b..18be1ad 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -125,8 +125,6 @@ namespace llvm {
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
     PM->add(createInstructionCombiningPass());  // Combine silly seq's
     
-    // FIXME: CondProp breaks critical edges, which is slow.
-    PM->add(createCondPropagationPass());       // Propagate conditionals
     PM->add(createTailCallEliminationPass());   // Eliminate tail calls
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
     PM->add(createReassociatePass());           // Reassociate expressions
@@ -146,7 +144,7 @@ namespace llvm {
     // Run instcombine after redundancy elimination to exploit opportunities
     // opened up by them.
     PM->add(createInstructionCombiningPass());
-    PM->add(createCondPropagationPass());       // Propagate conditionals
+    PM->add(createJumpThreadingPass());         // Thread jumps
     PM->add(createDeadStoreEliminationPass());  // Delete dead stores
     PM->add(createAggressiveDCEPass());         // Delete dead instructions
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index e9cf585..afed853 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -20,31 +20,27 @@
 #define LLVM_SUPPORT_TARGETFOLDER_H
 
 #include "llvm/Constants.h"
-#include "llvm/Instruction.h"
 #include "llvm/InstrTypes.h"
 #include "llvm/Analysis/ConstantFolding.h"
 
 namespace llvm {
 
 class TargetData;
-class LLVMContext;
 
 /// TargetFolder - Create constants with target dependent folding.
 class TargetFolder {
   const TargetData *TD;
-  LLVMContext &Context;
 
   /// Fold - Fold the constant using target specific information.
   Constant *Fold(Constant *C) const {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      if (Constant *CF = ConstantFoldConstantExpression(CE, Context, TD))
+      if (Constant *CF = ConstantFoldConstantExpression(CE, TD))
         return CF;
     return C;
   }
 
 public:
-  explicit TargetFolder(const TargetData *TheTD, LLVMContext &C) :
-    TD(TheTD), Context(C) {}
+  explicit TargetFolder(const TargetData *TheTD) : TD(TheTD) {}
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 66d6aaa..a78e81f 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -169,7 +169,7 @@ public:
     return *this;
   }
 
-  raw_ostream &operator<<(const StringRef &Str) {
+  raw_ostream &operator<<(StringRef Str) {
     // Inline fast path, particularly for strings with a known length.
     size_t Size = Str.size();
 
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index cfaae4b..ce916b5 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -96,6 +96,12 @@ template <typename T> struct remove_pointer<T*volatile> { typedef T type; };
 template <typename T> struct remove_pointer<T*const volatile> {
     typedef T type; };
 
+template <bool, typename T, typename F>
+struct conditional { typedef T type; };
+
+template <typename T, typename F>
+struct conditional<false, T, F> { typedef F type; };
+
 }
 
 #endif
diff --git a/include/llvm/System/Host.h b/include/llvm/System/Host.h
index 3c6aa9d..6de1a4a 100644
--- a/include/llvm/System/Host.h
+++ b/include/llvm/System/Host.h
@@ -41,6 +41,12 @@ namespace sys {
   ///   CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM
   std::string getHostTriple();
 
+  /// getHostCPUName - Get the LLVM name for the host CPU. The particular format
+  /// of the name is target dependent, and suitable for passing as -mcpu to the
+  /// target which matches the host.
+  ///
+  /// \return - The host CPU name, or empty if the CPU could not be determined.
+  std::string getHostCPUName();
 }
 }
 
diff --git a/include/llvm/System/TimeValue.h b/include/llvm/System/TimeValue.h
index 168e2a7..b82647f 100644
--- a/include/llvm/System/TimeValue.h
+++ b/include/llvm/System/TimeValue.h
@@ -65,8 +65,8 @@ namespace sys {
   /// @name Types
   /// @{
   public:
-    typedef int64_t SecondsType;        ///< Type used for representing seconds.
-    typedef int32_t NanoSecondsType;    ///< Type used for representing nanoseconds.
+    typedef int64_t SecondsType;    ///< Type used for representing seconds.
+    typedef int32_t NanoSecondsType;///< Type used for representing nanoseconds.
 
     enum TimeConversions {
       NANOSECONDS_PER_SECOND = 1000000000,  ///< One Billion
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index edb8582..6f1e066 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -413,6 +413,7 @@ def DBG_LABEL : Instruction {
   let AsmString = "";
   let Namespace = "TargetInstrInfo";
   let hasCtrlDep = 1;
+  let isNotDuplicable = 1;
 }
 def EH_LABEL : Instruction {
   let OutOperandList = (ops);
@@ -420,6 +421,7 @@ def EH_LABEL : Instruction {
   let AsmString = "";
   let Namespace = "TargetInstrInfo";
   let hasCtrlDep = 1;
+  let isNotDuplicable = 1;
 }
 def GC_LABEL : Instruction {
   let OutOperandList = (ops);
@@ -427,6 +429,7 @@ def GC_LABEL : Instruction {
   let AsmString = "";
   let Namespace = "TargetInstrInfo";
   let hasCtrlDep = 1;
+  let isNotDuplicable = 1;
 }
 def KILL : Instruction {
   let OutOperandList = (ops);
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index af85f73..e1d052e 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -21,10 +21,7 @@
 #define LLVM_TARGET_TARGETDATA_H
 
 #include "llvm/Pass.h"
-#include "llvm/System/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
-#include <string>
 
 namespace llvm {
 
@@ -33,6 +30,7 @@ class Type;
 class IntegerType;
 class StructType;
 class StructLayout;
+class StructLayoutMap;
 class GlobalVariable;
 class LLVMContext;
 
@@ -73,26 +71,22 @@ private:
   unsigned char PointerABIAlign;       ///< Pointer ABI alignment
   unsigned char PointerPrefAlign;      ///< Pointer preferred alignment
 
-  //! Where the primitive type alignment data is stored.
-  /*!
-   @sa init().
-   @note Could support multiple size pointer alignments, e.g., 32-bit pointers
-   vs. 64-bit pointers by extending TargetAlignment, but for now, we don't.
-   */
+  SmallVector<unsigned char, 8> LegalIntWidths; ///< Legal Integers.
+  
+  /// Alignments- Where the primitive type alignment data is stored.
+  ///
+  /// @sa init().
+  /// @note Could support multiple size pointer alignments, e.g., 32-bit
+  /// pointers vs. 64-bit pointers by extending TargetAlignment, but for now,
+  /// we don't.
   SmallVector<TargetAlignElem, 16> Alignments;
-  //! Alignment iterator shorthand
-  typedef SmallVector<TargetAlignElem, 16>::iterator align_iterator;
-  //! Constant alignment iterator shorthand
-  typedef SmallVector<TargetAlignElem, 16>::const_iterator align_const_iterator;
-  //! Invalid alignment.
-  /*!
-    This member is a signal that a requested alignment type and bit width were
-    not found in the SmallVector.
-   */
+  
+  /// InvalidAlignmentElem - This member is a signal that a requested alignment
+  /// type and bit width were not found in the SmallVector.
   static const TargetAlignElem InvalidAlignmentElem;
 
-  // Opaque pointer for the StructType -> StructLayout map.
-  mutable void* LayoutMap;
+  // The StructType -> StructLayout map.
+  mutable StructLayoutMap *LayoutMap;
 
   //! Set/initialize target alignments
   void setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
@@ -106,8 +100,8 @@ private:
   ///
   /// Predicate that tests a TargetAlignElem reference returned by get() against
   /// InvalidAlignmentElem.
-  inline bool validAlignment(const TargetAlignElem &align) const {
-    return (&align != &InvalidAlignmentElem);
+  bool validAlignment(const TargetAlignElem &align) const {
+    return &align != &InvalidAlignmentElem;
   }
 
 public:
@@ -115,13 +109,10 @@ public:
   ///
   /// @note This has to exist, because this is a pass, but it should never be
   /// used.
-  TargetData() : ImmutablePass(&ID) {
-    llvm_report_error("Bad TargetData ctor used.  "
-                      "Tool did not specify a TargetData to use?");
-  }
-
+  TargetData();
+  
   /// Constructs a TargetData from a specification string. See init().
-  explicit TargetData(const std::string &TargetDescription)
+  explicit TargetData(StringRef TargetDescription)
     : ImmutablePass(&ID) {
     init(TargetDescription);
   }
@@ -135,6 +126,7 @@ public:
     PointerMemSize(TD.PointerMemSize),
     PointerABIAlign(TD.PointerABIAlign),
     PointerPrefAlign(TD.PointerPrefAlign),
+    LegalIntWidths(TD.LegalIntWidths),
     Alignments(TD.Alignments),
     LayoutMap(0)
   { }
@@ -142,16 +134,35 @@ public:
   ~TargetData();  // Not virtual, do not subclass this class
 
   //! Parse a target data layout string and initialize TargetData alignments.
-  void init(const std::string &TargetDescription);
+  void init(StringRef TargetDescription);
 
   /// Target endianness...
-  bool          isLittleEndian()       const { return     LittleEndian; }
-  bool          isBigEndian()          const { return    !LittleEndian; }
+  bool isLittleEndian() const { return LittleEndian; }
+  bool isBigEndian() const { return !LittleEndian; }
 
   /// getStringRepresentation - Return the string representation of the
   /// TargetData.  This representation is in the same format accepted by the
   /// string constructor above.
   std::string getStringRepresentation() const;
+  
+  /// isLegalInteger - This function returns true if the specified type is
+  /// known tobe a native integer type supported by the CPU.  For example,
+  /// i64 is not native on most 32-bit CPUs and i37 is not native on any known
+  /// one.  This returns false if the integer width is not legal.
+  ///
+  /// The width is specified in bits.
+  ///
+  bool isLegalInteger(unsigned Width) const {
+    for (unsigned i = 0, e = LegalIntWidths.size(); i != e; ++i)
+      if (LegalIntWidths[i] == Width)
+        return true;
+    return false;
+  }
+  
+  bool isIllegalInteger(unsigned Width) const {
+    return !isLegalInteger(Width);
+  }
+  
   /// Target pointer alignment
   unsigned char getPointerABIAlignment() const { return PointerABIAlign; }
   /// Return target's alignment for stack-based pointers
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 1d42c32..43fd54e 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -171,6 +171,25 @@ public:
                                        int &FrameIndex) const {
     return 0;
   }
+
+  /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+    return 0;
+  }
+
+  /// hasLoadFromStackSlot - If the specified machine instruction has
+  /// a load from a stack slot, return true along with the FrameIndex
+  /// of the loaded stack slot.  If not, return false.  Unlike
+  /// isLoadFromStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is just a hint, as some cases may be
+  /// missed.
+  virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
+                                    int &FrameIndex) const {
+    return 0;
+  }
   
   /// isStoreToStackSlot - If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
@@ -182,12 +201,32 @@ public:
     return 0;
   }
 
+  /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+    return 0;
+  }
+
+  /// hasStoreToStackSlot - If the specified machine instruction has a
+  /// store to a stack slot, return true along with the FrameIndex of
+  /// the loaded stack slot.  If not, return false.  Unlike
+  /// isStoreToStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is just a hint, as some cases may be
+  /// missed.
+  virtual bool hasStoreToStackSlot(const MachineInstr *MI,
+                                   int &FrameIndex) const {
+    return 0;
+  }
+
   /// reMaterialize - Re-issue the specified 'original' instruction at the
   /// specific location targeting a new destination register.
   virtual void reMaterialize(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MI,
                              unsigned DestReg, unsigned SubIdx,
-                             const MachineInstr *Orig) const = 0;
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo *TRI) const = 0;
 
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
@@ -226,6 +265,14 @@ public:
   virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
                                      unsigned &SrcOpIdx2) const = 0;
 
+  /// isIdentical - Return true if two instructions are identical. This differs
+  /// from MachineInstr::isIdenticalTo() in that it does not require the
+  /// virtual destination registers to be the same. This is used by MachineLICM
+  /// and other MI passes to perform CSE.
+  virtual bool isIdentical(const MachineInstr *MI,
+                           const MachineInstr *Other,
+                           const MachineRegisterInfo *MRI) const = 0;
+
   /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
   /// implemented for a target).  Upon success, this returns false and returns
@@ -489,6 +536,13 @@ public:
   /// length.
   virtual unsigned getInlineAsmLength(const char *Str,
                                       const MCAsmInfo &MAI) const;
+
+  /// TailDuplicationLimit - Returns the limit on the number of instructions
+  /// in basic block MBB beyond which it will not be tail-duplicated.
+  virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB,
+                                        unsigned DefaultLimit) const {
+    return DefaultLimit;
+  }
 };
 
 /// TargetInstrInfoImpl - This is the default implementation of
@@ -509,7 +563,12 @@ public:
   virtual void reMaterialize(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MI,
                              unsigned DestReg, unsigned SubReg,
-                             const MachineInstr *Orig) const;
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo *TRI) const;
+  virtual bool isIdentical(const MachineInstr *MI,
+                           const MachineInstr *Other,
+                           const MachineRegisterInfo *MRI) const;
+
   virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
 };
 
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 8bc39d0..ca51102 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -978,7 +978,7 @@ protected:
   /// not work with the with specified type and indicate what to do about it.
   void setLoadExtAction(unsigned ExtType, MVT VT,
                       LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy < sizeof(LoadExtActions[0])*4 &&
+    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
            ExtType < array_lengthof(LoadExtActions) &&
            "Table isn't big enough!");
     LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2);
@@ -990,7 +990,7 @@ protected:
   void setTruncStoreAction(MVT ValVT, MVT MemVT,
                            LegalizeAction Action) {
     assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) &&
-           (unsigned)MemVT.SimpleTy < sizeof(TruncStoreActions[0])*4 &&
+           (unsigned)MemVT.SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL)  << MemVT.SimpleTy*2);
     TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2;
@@ -1025,7 +1025,7 @@ protected:
   void setConvertAction(MVT FromVT, MVT ToVT,
                         LegalizeAction Action) {
     assert((unsigned)FromVT.SimpleTy < array_lengthof(ConvertActions) &&
-           (unsigned)ToVT.SimpleTy < sizeof(ConvertActions[0])*4 &&
+           (unsigned)ToVT.SimpleTy < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     ConvertActions[FromVT.SimpleTy] &= ~(uint64_t(3UL)  << ToVT.SimpleTy*2);
     ConvertActions[FromVT.SimpleTy] |= (uint64_t)Action << ToVT.SimpleTy*2;
@@ -1035,7 +1035,7 @@ protected:
   /// supported on the target and indicate what to do about it.
   void setCondCodeAction(ISD::CondCode CC, MVT VT,
                          LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy < sizeof(CondCodeActions[0])*4 &&
+    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
            (unsigned)CC < array_lengthof(CondCodeActions) &&
            "Table isn't big enough!");
     CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL)  << VT.SimpleTy*2);
@@ -1167,6 +1167,18 @@ public:
     return SDValue();    // this is here to silence compiler errors
   }
 
+  /// CanLowerReturn - This hook should be implemented to check whether the
+  /// return values described by the Outs array can fit into the return
+  /// registers.  If false is returned, an sret-demotion is performed.
+  ///
+  virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+               const SmallVectorImpl<EVT> &OutTys,
+               const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+               SelectionDAG &DAG)
+  {
+    // Return true by default to get preexisting behavior.
+    return true;
+  }
   /// LowerReturn - This hook must be implemented to lower outgoing
   /// return values, described by the Outs array, into the specified
   /// DAG. The implementation should return the resulting token chain
@@ -1502,6 +1514,14 @@ public:
     return false;
   }
 
+  /// isLegalICmpImmediate - Return true if the specified immediate is legal
+  /// icmp immediate, that is the target has icmp instructions which can compare
+  /// a register against the immediate without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalICmpImmediate(int64_t Imm) const {
+    return true;
+  }
+
   //===--------------------------------------------------------------------===//
   // Div utility functions
   //
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 821e537..9a64191 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 
 namespace llvm {
@@ -26,7 +27,6 @@ namespace llvm {
   class MCSectionMachO;
   class MCContext;
   class GlobalValue;
-  class StringRef;
   class TargetMachine;
   
 class TargetLoweringObjectFile {
@@ -288,14 +288,14 @@ public:
 
   /// getMachOSection - Return the MCSection for the specified mach-o section.
   /// This requires the operands to be valid.
-  const MCSectionMachO *getMachOSection(const StringRef &Segment,
-                                        const StringRef &Section,
+  const MCSectionMachO *getMachOSection(StringRef Segment,
+                                        StringRef Section,
                                         unsigned TypeAndAttributes,
                                         SectionKind K) const {
     return getMachOSection(Segment, Section, TypeAndAttributes, 0, K);
   }
-  const MCSectionMachO *getMachOSection(const StringRef &Segment,
-                                        const StringRef &Section,
+  const MCSectionMachO *getMachOSection(StringRef Segment,
+                                        StringRef Section,
                                         unsigned TypeAndAttributes,
                                         unsigned Reserved2,
                                         SectionKind K) const;
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index b7e8af9..cd6fd28 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -275,6 +275,7 @@ private:
   regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
 
   int CallFrameSetupOpcode, CallFrameDestroyOpcode;
+
 protected:
   TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
                      regclass_iterator RegClassBegin,
@@ -463,6 +464,11 @@ public:
   /// exist.
   virtual unsigned getSubReg(unsigned RegNo, unsigned Index) const = 0;
 
+  /// getSubRegIndex - For a given register pair, return the sub-register index
+  /// if they are second register is a sub-register of the second. Return zero
+  /// otherwise.
+  virtual unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const = 0;
+
   /// getMatchingSuperReg - Return a super-register of the specified register
   /// Reg so its sub-register of index SubIdx is Reg.
   unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
@@ -684,7 +690,7 @@ public:
 
   /// getFrameRegister - This method should return the register used as a base
   /// for values allocated in the current stack frame.
-  virtual unsigned getFrameRegister(MachineFunction &MF) const = 0;
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
 
   /// getFrameIndexOffset - Returns the displacement from the frame register to
   /// the stack frame of the specified index.
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index 395526f..167e1d1 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -51,7 +51,7 @@ namespace llvm {
     typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT);
 
     typedef const MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
-                                                const StringRef &TT);
+                                                StringRef TT);
     typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T,
                                                   const std::string &TT,
                                                   const std::string &Features);
@@ -163,7 +163,7 @@ namespace llvm {
     /// feature set; it should always be provided. Generally this should be
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
-    const MCAsmInfo *createAsmInfo(const StringRef &Triple) const {
+    const MCAsmInfo *createAsmInfo(StringRef Triple) const {
       if (!AsmInfoCtorFn)
         return 0;
       return AsmInfoCtorFn(*this, Triple);
@@ -461,7 +461,7 @@ namespace llvm {
       TargetRegistry::RegisterAsmInfo(T, &Allocator);
     }
   private:
-    static const MCAsmInfo *Allocator(const Target &T, const StringRef &TT) {
+    static const MCAsmInfo *Allocator(const Target &T, StringRef TT) {
       return new MCAsmInfoImpl(T, TT);
     }
     
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h
index fd107e0..22b09ba 100644
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@@ -20,6 +20,8 @@ namespace llvm {
 
 class SDep;
 class SUnit;
+class TargetRegisterClass;
+template <typename T> class SmallVectorImpl;
 
 //===----------------------------------------------------------------------===//
 ///
@@ -36,6 +38,7 @@ public:
   // AntiDepBreakMode - Type of anti-dependence breaking that should
   // be performed before post-RA scheduling.
   typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode;
+  typedef SmallVectorImpl<TargetRegisterClass*> RegClassVector;
 
   virtual ~TargetSubtarget();
 
@@ -47,13 +50,12 @@ public:
 
   // enablePostRAScheduler - If the target can benefit from post-regalloc
   // scheduling and the specified optimization level meets the requirement
-  // return true to enable post-register-allocation scheduling. 
+  // return true to enable post-register-allocation scheduling. In
+  // CriticalPathRCs return any register classes that should only be broken
+  // if on the critical path. 
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                                     AntiDepBreakMode& mode) const {
-    mode = ANTIDEP_NONE;
-    return false;
-  }
-
+                                     AntiDepBreakMode& Mode,
+                                     RegClassVector& CriticalPathRCs) const;
   // adjustSchedDependency - Perform target specific adjustments to
   // the latency of a schedule dependency.
   virtual void adjustSchedDependency(SUnit *def, SUnit *use, 
diff --git a/include/llvm/Transforms/RSProfiling.h b/include/llvm/Transforms/RSProfiling.h
index 98ec396..02439e8 100644
--- a/include/llvm/Transforms/RSProfiling.h
+++ b/include/llvm/Transforms/RSProfiling.h
@@ -15,7 +15,11 @@
 #ifndef LLVM_TRANSFORMS_RSPROFILING_H
 #define LLVM_TRANSFORMS_RSPROFILING_H
 
+#include "llvm/Pass.h"
+
 namespace llvm {
+  class Value;
+  
   //===--------------------------------------------------------------------===//
   /// RSProfilers - The basic Random Sampling Profiler Interface  Any profiler 
   /// that implements this interface can be transformed by the random sampling
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 523a8f4..7159f86 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -171,14 +171,6 @@ FunctionPass *createReassociatePass();
 
 //===----------------------------------------------------------------------===//
 //
-// CondPropagationPass - This pass propagates information about conditional
-// expressions through the program, allowing it to eliminate conditional
-// branches in some cases.
-//
-FunctionPass *createCondPropagationPass();
-
-//===----------------------------------------------------------------------===//
-//
 // TailDuplication - Eliminate unconditional branches through controlled code
 // duplication, creating simpler CFG structures.
 //
@@ -271,7 +263,7 @@ extern const PassInfo *const LCSSAID;
 // GVN - This pass performs global value numbering and redundant load 
 // elimination cotemporaneously.
 //
-FunctionPass *createGVNPass(bool NoPRE = false);
+FunctionPass *createGVNPass(bool NoPRE = false, bool NoLoads = false);
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 5b15b5b..e9099f8 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -24,6 +24,7 @@ namespace llvm {
 
 class Module;
 class Function;
+class Instruction;
 class Pass;
 class LPPassManager;
 class BasicBlock;
@@ -154,7 +155,8 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                SmallVectorImpl<ReturnInst*> &Returns,
                                const char *NameSuffix = "", 
                                ClonedCodeInfo *CodeInfo = 0,
-                               const TargetData *TD = 0);
+                               const TargetData *TD = 0,
+                               Instruction *TheCall = 0);
 
 /// InlineFunction - This function inlines the called function into the basic
 /// block of the caller.  This returns false if it is not possible to inline
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 419029f..292af1d 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -78,6 +78,21 @@ void RecursivelyDeleteDeadPHINode(PHINode *PN);
 //  Control Flow Graph Restructuring.
 //
 
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB.  If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values.  For example, if we have:
+///   x = phi(1, 0, 0, 0)
+///   y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the 'and' to 0.
+void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+                                  TargetData *TD = 0);
+    
+  
 /// MergeBasicBlockIntoOnlyPred - BB is a block with one predecessor and its
 /// predecessor is known to have one successor (BB!).  Eliminate the edge
 /// between them, moving the instructions in the predecessor into BB.  This
@@ -85,7 +100,14 @@ void RecursivelyDeleteDeadPHINode(PHINode *PN);
 ///
 void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P = 0);
     
-  
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch.  If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true.  If we can't transform, return false.
+bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB);
+    
 /// SimplifyCFG - This function is used to do simplification of a CFG.  For
 /// example, it adjusts branches to branches to eliminate the extra hop, it
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
diff --git a/include/llvm/Transforms/Utils/SSI.h b/include/llvm/Transforms/Utils/SSI.h
index ff5bb7b..198fc82 100644
--- a/include/llvm/Transforms/Utils/SSI.h
+++ b/include/llvm/Transforms/Utils/SSI.h
@@ -22,6 +22,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SSI_H
 #define LLVM_TRANSFORMS_UTILS_SSI_H
 
+#include "llvm/InstrTypes.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h
index d84196f..26b1dbf 100644
--- a/include/llvm/TypeSymbolTable.h
+++ b/include/llvm/TypeSymbolTable.h
@@ -58,26 +58,26 @@ public:
   /// incrementing an integer and appending it to the name, if necessary
   /// @returns the unique name
   /// @brief Get a unique name for a type
-  std::string getUniqueName(const StringRef &BaseName) const;
+  std::string getUniqueName(StringRef BaseName) const;
 
   /// This method finds the type with the given \p name in the type map
   /// and returns it.
   /// @returns null if the name is not found, otherwise the Type
   /// associated with the \p name.
   /// @brief Lookup a type by name.
-  Type *lookup(const StringRef &name) const;
+  Type *lookup(StringRef name) const;
 
   /// Lookup the type associated with name.
   /// @returns end() if the name is not found, or an iterator at the entry for
   /// Type.
-  iterator find(const StringRef &Name) {
+  iterator find(StringRef Name) {
     return tmap.find(Name);
   }
 
   /// Lookup the type associated with name.
   /// @returns end() if the name is not found, or an iterator at the entry for
   /// Type.
-  const_iterator find(const StringRef &Name) const {
+  const_iterator find(StringRef Name) const {
     return tmap.find(Name);
   }
 
@@ -119,7 +119,7 @@ public:
   /// a many-to-one mapping between names and types. This method allows a type
   /// with an existing entry in the symbol table to get a new name.
   /// @brief Insert a type under a new name.
-  void insert(const StringRef &Name, const Type *Typ);
+  void insert(StringRef Name, const Type *Typ);
 
   /// Remove a type at the specified position in the symbol table.
   /// @returns the removed Type.
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index b485524..0960346 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -224,8 +224,12 @@ public:
     NamedMDNodeVal,           // This is an instance of NamedMDNode
     InlineAsmVal,             // This is an instance of InlineAsm
     PseudoSourceValueVal,     // This is an instance of PseudoSourceValue
+    FixedStackPseudoSourceValueVal, // This is an instance of 
+                                    // FixedStackPseudoSourceValue
     InstructionVal,           // This is an instance of Instruction
-    
+    // Enum values starting at InstructionVal are used for Instructions;
+    // don't add new values here!
+
     // Markers:
     ConstantFirstVal = FunctionVal,
     ConstantLastVal  = ConstantPointerNullVal
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index b147c1e..e05fdbd 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -69,7 +69,7 @@ public:
   /// the symbol table. 
   /// @returns the value associated with the \p Name
   /// @brief Lookup a named Value.
-  Value *lookup(const StringRef &Name) const { return vmap.lookup(Name); }
+  Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
 
   /// @returns true iff the symbol table is empty
   /// @brief Determine if the symbol table is empty
@@ -112,7 +112,7 @@ private:
   /// createValueName - This method attempts to create a value name and insert
   /// it into the symbol table with the specified name.  If it conflicts, it
   /// auto-renames the name and returns that instead.
-  ValueName *createValueName(const StringRef &Name, Value *V);
+  ValueName *createValueName(StringRef Name, Value *V);
   
   /// This method removes a value from the symbol table.  It leaves the
   /// ValueName attached to the value, but it is no longer inserted in the
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index c81190b..b8d69f4 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -23,7 +23,6 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetData.h"
@@ -99,7 +98,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
 /// isObjectSmallerThan - Return true if we can prove that the object specified
 /// by V is smaller than Size.
 static bool isObjectSmallerThan(const Value *V, unsigned Size,
-                                LLVMContext &Context, const TargetData &TD) {
+                                const TargetData &TD) {
   const Type *AccessTy;
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     AccessTy = GV->getType()->getElementType();
@@ -109,7 +108,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size,
     else
       return false;
   } else if (const CallInst* CI = extractMallocCall(V)) {
-    if (!isArrayMalloc(V, Context, &TD))
+    if (!isArrayMalloc(V, &TD))
       // The size is the argument to the malloc call.
       if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1)))
         return (C->getZExtValue() < Size);
@@ -647,11 +646,25 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
   const Value *O1 = V1->getUnderlyingObject();
   const Value *O2 = V2->getUnderlyingObject();
 
+  // Null values in the default address space don't point to any object, so they
+  // don't alias any other pointer.
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+
   if (O1 != O2) {
     // If V1/V2 point to two different objects we know that we have no alias.
     if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
       return NoAlias;
-  
+
+    // Constant pointers can't alias with non-const isIdentifiedObject objects.
+    if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+        (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+      return NoAlias;
+
     // Arguments can't alias with local allocations or noalias calls.
     if ((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
         (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))
@@ -665,10 +678,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
   
   // If the size of one access is larger than the entire object on the other
   // side, then we know such behavior is undefined and can assume no alias.
-  LLVMContext &Context = V1->getContext();
   if (TD)
-    if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) ||
-        (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD)))
+    if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, *TD)) ||
+        (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD)))
       return NoAlias;
   
   // If one pointer is the result of a call/invoke and the other is a
@@ -707,16 +719,16 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
 
 // This function is used to determine if the indices of two GEP instructions are
 // equal. V1 and V2 are the indices.
-static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) {
+static bool IndexOperandsEqual(Value *V1, Value *V2) {
   if (V1->getType() == V2->getType())
     return V1 == V2;
   if (Constant *C1 = dyn_cast<Constant>(V1))
     if (Constant *C2 = dyn_cast<Constant>(V2)) {
       // Sign extend the constants to long types, if necessary
-      if (C1->getType() != Type::getInt64Ty(Context))
-        C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
-      if (C2->getType() != Type::getInt64Ty(Context)) 
-        C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
+      if (C1->getType() != Type::getInt64Ty(C1->getContext()))
+        C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+      if (C2->getType() != Type::getInt64Ty(C1->getContext())) 
+        C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
       return C1 == C2;
     }
   return false;
@@ -737,8 +749,6 @@ BasicAliasAnalysis::CheckGEPInstructions(
 
   const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
 
-  LLVMContext &Context = GEPPointerTy->getContext();
-
   // Find the (possibly empty) initial sequence of equal values... which are not
   // necessarily constants.
   unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
@@ -746,8 +756,7 @@ BasicAliasAnalysis::CheckGEPInstructions(
   unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands);
   unsigned UnequalOper = 0;
   while (UnequalOper != MinOperands &&
-         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper],
-         Context)) {
+         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) {
     // Advance through the type as we go...
     ++UnequalOper;
     if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
@@ -811,10 +820,11 @@ BasicAliasAnalysis::CheckGEPInstructions(
         if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){
           if (G1OC->getType() != G2OC->getType()) {
             // Sign extend both operands to long.
-            if (G1OC->getType() != Type::getInt64Ty(Context))
-              G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context));
-            if (G2OC->getType() != Type::getInt64Ty(Context)) 
-              G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context));
+            const Type *Int64Ty = Type::getInt64Ty(G1OC->getContext());
+            if (G1OC->getType() != Int64Ty)
+              G1OC = ConstantExpr::getSExt(G1OC, Int64Ty);
+            if (G2OC->getType() != Int64Ty) 
+              G2OC = ConstantExpr::getSExt(G2OC, Int64Ty);
             GEP1Ops[FirstConstantOper] = G1OC;
             GEP2Ops[FirstConstantOper] = G2OC;
           }
@@ -950,7 +960,7 @@ BasicAliasAnalysis::CheckGEPInstructions(
   for (unsigned i = 0; i != FirstConstantOper; ++i) {
     if (!isa<StructType>(ZeroIdxTy))
       GEP1Ops[i] = GEP2Ops[i] = 
-                              Constant::getNullValue(Type::getInt32Ty(Context));
+              Constant::getNullValue(Type::getInt32Ty(ZeroIdxTy->getContext()));
 
     if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy))
       ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]);
@@ -992,11 +1002,11 @@ BasicAliasAnalysis::CheckGEPInstructions(
           //
           if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty))
             GEP1Ops[i] =
-                  ConstantInt::get(Type::getInt64Ty(Context), 
+                  ConstantInt::get(Type::getInt64Ty(AT->getContext()), 
                                    AT->getNumElements()-1);
           else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty))
             GEP1Ops[i] = 
-                  ConstantInt::get(Type::getInt64Ty(Context),
+                  ConstantInt::get(Type::getInt64Ty(VT->getContext()),
                                    VT->getNumElements()-1);
         }
       }
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index f21fd54..0a83c3d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -15,8 +15,10 @@ add_llvm_library(LLVMAnalysis
   IVUsers.cpp
   InlineCost.cpp
   InstCount.cpp
+  InstructionSimplify.cpp
   Interval.cpp
   IntervalPartition.cpp
+  LazyValueInfo.cpp
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
   LiveValues.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 33a5792..1cdadbf 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -23,7 +23,6 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
@@ -493,8 +492,7 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){
 /// these together.  If target data info is available, it is provided as TD, 
 /// otherwise TD is null.
 static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
-                                           Constant *Op1, const TargetData *TD,
-                                           LLVMContext &Context){
+                                           Constant *Op1, const TargetData *TD){
   // SROA
   
   // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
@@ -521,15 +519,15 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
 /// constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
+static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
                                          const Type *ResultTy,
-                                         LLVMContext &Context,
                                          const TargetData *TD) {
   Constant *Ptr = Ops[0];
   if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
     return 0;
 
-  unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context));
+  unsigned BitWidth =
+    TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext()));
   APInt BasePtr(BitWidth, 0);
   bool BaseIsInt = true;
   if (!Ptr->isNullValue()) {
@@ -558,7 +556,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
   // If the base value for this address is a literal integer value, fold the
   // getelementptr to the resulting integer value casted to the pointer type.
   if (BaseIsInt) {
-    Constant *C = ConstantInt::get(Context, Offset+BasePtr);
+    Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
     return ConstantExpr::getIntToPtr(C, ResultTy);
   }
 
@@ -579,7 +577,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
         return 0;
       APInt NewIdx = Offset.udiv(ElemSize);
       Offset -= NewIdx * ElemSize;
-      NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx));
+      NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()),
+                                         NewIdx));
       Ty = ATy->getElementType();
     } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
       // Determine which field of the struct the offset points into. The
@@ -587,7 +586,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
       // know the offset is within the struct at this point.
       const StructLayout &SL = *TD->getStructLayout(STy);
       unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
-      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx));
+      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                         ElIdx));
       Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
       Ty = STy->getTypeAtIndex(ElIdx);
     } else {
@@ -628,8 +628,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
 /// is returned.  Note that this function can only fail when attempting to fold
 /// instructions like loads and stores, which have no constant expression form.
 ///
-Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
-                                        const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     if (PN->getNumIncomingValues() == 0)
       return UndefValue::get(PN->getType());
@@ -656,33 +655,30 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
       return 0;  // All operands not constant!
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           Ops.data(), Ops.size(), 
-                                           Context, TD);
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
   
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     return ConstantFoldLoadInst(LI, TD);
   
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                  Ops.data(), Ops.size(), Context, TD);
+                                  Ops.data(), Ops.size(), TD);
 }
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
-                                               LLVMContext &Context,
                                                const TargetData *TD) {
   SmallVector<Constant*, 8> Ops;
   for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
     Ops.push_back(cast<Constant>(*i));
 
   if (CE->isCompare())
-    return ConstantFoldCompareInstOperands(CE->getPredicate(),
-                                           Ops.data(), Ops.size(), 
-                                           Context, TD);
+    return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                           TD);
   return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
-                                  Ops.data(), Ops.size(), Context, TD);
+                                  Ops.data(), Ops.size(), TD);
 }
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -693,13 +689,11 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
 ///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, 
                                          Constant* const* Ops, unsigned NumOps,
-                                         LLVMContext &Context,
                                          const TargetData *TD) {
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
     if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
-      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD,
-                                                  Context))
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
         return C;
     
     return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
@@ -724,7 +718,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
         if (TD->getPointerSizeInBits() < InWidth) {
           Constant *Mask = 
-            ConstantInt::get(Context, APInt::getLowBitsSet(InWidth,
+            ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
                                                   TD->getPointerSizeInBits()));
           Input = ConstantExpr::getAnd(Input, Mask);
         }
@@ -766,7 +760,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
                                             AT->getNumElements()))) {
                         Constant *Index[] = {
                           Constant::getNullValue(CE->getType()),
-                          ConstantInt::get(Context, ElemIdx)
+                          ConstantInt::get(ElTy->getContext(), ElemIdx)
                         };
                         return
                         ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
@@ -800,7 +794,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD))
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
       return C;
     
     return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
@@ -812,9 +806,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
 /// returns a constant expression of the specified operands.
 ///
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
-                                                Constant*const * Ops, 
-                                                unsigned NumOps,
-                                                LLVMContext &Context,
+                                                Constant *Ops0, Constant *Ops1, 
                                                 const TargetData *TD) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
@@ -823,17 +815,16 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
   //
   // ConstantExpr::getCompare cannot do this, because it doesn't have TD
   // around to know if bit truncation is happening.
-  if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) {
-    if (TD && Ops[1]->isNullValue()) {
-      const Type *IntPtrTy = TD->getIntPtrType(Context);
+  if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+    if (TD && Ops1->isNullValue()) {
+      const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
       if (CE0->getOpcode() == Instruction::IntToPtr) {
         // Convert the integer value to the right size to ensure we get the
         // proper extension or truncation.
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
                                                    IntPtrTy, false);
-        Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
-        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
-                                               Context, TD);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
       }
       
       // Only do this transformation if the int is intptrty in size, otherwise
@@ -841,16 +832,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
       if (CE0->getOpcode() == Instruction::PtrToInt && 
           CE0->getType() == IntPtrTy) {
         Constant *C = CE0->getOperand(0);
-        Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
-        // FIXME!
-        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
-                                               Context, TD);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
       }
     }
     
-    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) {
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
       if (TD && CE0->getOpcode() == CE1->getOpcode()) {
-        const Type *IntPtrTy = TD->getIntPtrType(Context);
+        const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
 
         if (CE0->getOpcode() == Instruction::IntToPtr) {
           // Convert the integer value to the right size to ensure we get the
@@ -859,26 +848,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                       IntPtrTy, false);
           Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
                                                       IntPtrTy, false);
-          Constant *NewOps[] = { C0, C1 };
-          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, 
-                                                 Context, TD);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
         }
 
         // Only do this transformation if the int is intptrty in size, otherwise
         // there is a truncation or extension that we aren't modeling.
         if ((CE0->getOpcode() == Instruction::PtrToInt &&
              CE0->getType() == IntPtrTy &&
-             CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) {
-          Constant *NewOps[] = { 
-            CE0->getOperand(0), CE1->getOperand(0) 
-          };
-          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, 
-                                                 Context, TD);
-        }
+             CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+          return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+                                                 CE1->getOperand(0), TD);
       }
     }
   }
-  return ConstantExpr::getCompare(Predicate, Ops[0], Ops[1]);
+  
+  return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
 }
 
 
@@ -996,7 +980,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
 }
 
 static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, 
-                                const Type *Ty, LLVMContext &Context) {
+                                const Type *Ty) {
   errno = 0;
   V = NativeFP(V);
   if (errno != 0) {
@@ -1005,17 +989,15 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
   }
   
   if (Ty->isFloatTy())
-    return ConstantFP::get(Context, APFloat((float)V));
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
   if (Ty->isDoubleTy())
-    return ConstantFP::get(Context, APFloat(V));
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
   llvm_unreachable("Can only constant fold float/double");
   return 0; // dummy return to suppress warning
 }
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
-                                      double V, double W,
-                                      const Type *Ty,
-                                      LLVMContext &Context) {
+                                      double V, double W, const Type *Ty) {
   errno = 0;
   V = NativeFP(V, W);
   if (errno != 0) {
@@ -1024,9 +1006,9 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
   }
   
   if (Ty->isFloatTy())
-    return ConstantFP::get(Context, APFloat((float)V));
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
   if (Ty->isDoubleTy())
-    return ConstantFP::get(Context, APFloat(V));
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
   llvm_unreachable("Can only constant fold float/double");
   return 0; // dummy return to suppress warning
 }
@@ -1037,7 +1019,6 @@ Constant *
 llvm::ConstantFoldCall(Function *F, 
                        Constant *const *Operands, unsigned NumOperands) {
   if (!F->hasName()) return 0;
-  LLVMContext &Context = F->getContext();
   StringRef Name = F->getName();
 
   const Type *Ty = F->getReturnType();
@@ -1054,62 +1035,62 @@ llvm::ConstantFoldCall(Function *F,
       switch (Name[0]) {
       case 'a':
         if (Name == "acos")
-          return ConstantFoldFP(acos, V, Ty, Context);
+          return ConstantFoldFP(acos, V, Ty);
         else if (Name == "asin")
-          return ConstantFoldFP(asin, V, Ty, Context);
+          return ConstantFoldFP(asin, V, Ty);
         else if (Name == "atan")
-          return ConstantFoldFP(atan, V, Ty, Context);
+          return ConstantFoldFP(atan, V, Ty);
         break;
       case 'c':
         if (Name == "ceil")
-          return ConstantFoldFP(ceil, V, Ty, Context);
+          return ConstantFoldFP(ceil, V, Ty);
         else if (Name == "cos")
-          return ConstantFoldFP(cos, V, Ty, Context);
+          return ConstantFoldFP(cos, V, Ty);
         else if (Name == "cosh")
-          return ConstantFoldFP(cosh, V, Ty, Context);
+          return ConstantFoldFP(cosh, V, Ty);
         else if (Name == "cosf")
-          return ConstantFoldFP(cos, V, Ty, Context);
+          return ConstantFoldFP(cos, V, Ty);
         break;
       case 'e':
         if (Name == "exp")
-          return ConstantFoldFP(exp, V, Ty, Context);
+          return ConstantFoldFP(exp, V, Ty);
         break;
       case 'f':
         if (Name == "fabs")
-          return ConstantFoldFP(fabs, V, Ty, Context);
+          return ConstantFoldFP(fabs, V, Ty);
         else if (Name == "floor")
-          return ConstantFoldFP(floor, V, Ty, Context);
+          return ConstantFoldFP(floor, V, Ty);
         break;
       case 'l':
         if (Name == "log" && V > 0)
-          return ConstantFoldFP(log, V, Ty, Context);
+          return ConstantFoldFP(log, V, Ty);
         else if (Name == "log10" && V > 0)
-          return ConstantFoldFP(log10, V, Ty, Context);
+          return ConstantFoldFP(log10, V, Ty);
         else if (Name == "llvm.sqrt.f32" ||
                  Name == "llvm.sqrt.f64") {
           if (V >= -0.0)
-            return ConstantFoldFP(sqrt, V, Ty, Context);
+            return ConstantFoldFP(sqrt, V, Ty);
           else // Undefined
             return Constant::getNullValue(Ty);
         }
         break;
       case 's':
         if (Name == "sin")
-          return ConstantFoldFP(sin, V, Ty, Context);
+          return ConstantFoldFP(sin, V, Ty);
         else if (Name == "sinh")
-          return ConstantFoldFP(sinh, V, Ty, Context);
+          return ConstantFoldFP(sinh, V, Ty);
         else if (Name == "sqrt" && V >= 0)
-          return ConstantFoldFP(sqrt, V, Ty, Context);
+          return ConstantFoldFP(sqrt, V, Ty);
         else if (Name == "sqrtf" && V >= 0)
-          return ConstantFoldFP(sqrt, V, Ty, Context);
+          return ConstantFoldFP(sqrt, V, Ty);
         else if (Name == "sinf")
-          return ConstantFoldFP(sin, V, Ty, Context);
+          return ConstantFoldFP(sin, V, Ty);
         break;
       case 't':
         if (Name == "tan")
-          return ConstantFoldFP(tan, V, Ty, Context);
+          return ConstantFoldFP(tan, V, Ty);
         else if (Name == "tanh")
-          return ConstantFoldFP(tanh, V, Ty, Context);
+          return ConstantFoldFP(tanh, V, Ty);
         break;
       default:
         break;
@@ -1120,7 +1101,7 @@ llvm::ConstantFoldCall(Function *F,
     
     if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
       if (Name.startswith("llvm.bswap"))
-        return ConstantInt::get(Context, Op->getValue().byteSwap());
+        return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
       else if (Name.startswith("llvm.ctpop"))
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
       else if (Name.startswith("llvm.cttz"))
@@ -1149,18 +1130,20 @@ llvm::ConstantFoldCall(Function *F,
                       Op2->getValueAPF().convertToDouble();
 
         if (Name == "pow")
-          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context);
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
         if (Name == "fmod")
-          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context);
+          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
         if (Name == "atan2")
-          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context);
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
         if (Name == "llvm.powi.f32")
-          return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V,
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
                                                  (int)Op2C->getZExtValue())));
         if (Name == "llvm.powi.f64")
-          return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V,
-                                                 (int)Op2C->getZExtValue())));
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((double)std::pow((double)Op1V,
+                                                   (int)Op2C->getZExtValue())));
       }
       return 0;
     }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index b64dbf4..8f62245 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -366,6 +366,9 @@ bool DIGlobalVariable::Verify() const {
   if (isNull())
     return false;
 
+  if (!getDisplayName())
+    return false;
+
   if (getContext().isNull())
     return false;
 
@@ -406,6 +409,10 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
       Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
       Tag == dwarf::DW_TAG_restrict_type) {
     DIType BaseType = getTypeDerivedFrom();
+    // If this type is not derived from any type then take conservative 
+    // approach.
+    if (BaseType.isNull())
+      return getSizeInBits();
     if (BaseType.isDerivedType())
       return DIDerivedType(BaseType.getNode()).getOriginalTypeSize();
     else
@@ -599,9 +606,7 @@ void DIVariable::dump() const {
 //===----------------------------------------------------------------------===//
 
 DIFactory::DIFactory(Module &m)
-  : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0),
-    RegionStartFn(0), RegionEndFn(0),
-    DeclareFn(0) {
+  : M(m), VMContext(M.getContext()), DeclareFn(0) {
   EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext));
 }
 
@@ -646,9 +651,9 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
 /// CreateCompileUnit - Create a new descriptor for the specified compile
 /// unit.  Note that this does not unique compile units within the module.
 DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
-                                           StringRef Filename,
-                                           StringRef Directory,
-                                           StringRef Producer,
+                                           const char * Filename,
+                                           const char * Directory,
+                                           const char * Producer,
                                            bool isMain,
                                            bool isOptimized,
                                            const char *Flags,
@@ -670,7 +675,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
 }
 
 /// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
+DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_enumerator),
     MDString::get(VMContext, Name),
@@ -682,7 +687,7 @@ DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
 
 /// CreateBasicType - Create a basic type like int, float, etc.
 DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
-                                       StringRef Name,
+                                       const char * Name,
                                        DICompileUnit CompileUnit,
                                        unsigned LineNumber,
                                        uint64_t SizeInBits,
@@ -707,7 +712,7 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
 
 /// CreateBasicType - Create a basic type like int, float, etc.
 DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
-                                         StringRef Name,
+                                         const char * Name,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNumber,
                                          Constant *SizeInBits,
@@ -734,7 +739,7 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            DIDescriptor Context,
-                                           StringRef Name,
+                                           const char * Name,
                                            DICompileUnit CompileUnit,
                                            unsigned LineNumber,
                                            uint64_t SizeInBits,
@@ -762,7 +767,7 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
                                              DIDescriptor Context,
-                                             StringRef Name,
+                                             const char * Name,
                                              DICompileUnit CompileUnit,
                                              unsigned LineNumber,
                                              Constant *SizeInBits,
@@ -789,7 +794,7 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIDescriptor Context,
-                                               StringRef Name,
+                                               const char * Name,
                                                DICompileUnit CompileUnit,
                                                unsigned LineNumber,
                                                uint64_t SizeInBits,
@@ -821,7 +826,7 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
                                                  DIDescriptor Context,
-                                                 StringRef Name,
+                                                 const char * Name,
                                                  DICompileUnit CompileUnit,
                                                  unsigned LineNumber,
                                                  Constant *SizeInBits,
@@ -854,9 +859,9 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
 /// See comments in DISubprogram for descriptions of these fields.  This
 /// method does not unique the generated descriptors.
 DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
-                                         StringRef Name,
-                                         StringRef DisplayName,
-                                         StringRef LinkageName,
+                                         const char * Name,
+                                         const char * DisplayName,
+                                         const char * LinkageName,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNo, DIType Type,
                                          bool isLocalToUnit,
@@ -880,9 +885,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
 DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
-                                StringRef DisplayName,
-                                StringRef LinkageName,
+DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name,
+                                const char * DisplayName,
+                                const char * LinkageName,
                                 DICompileUnit CompileUnit,
                                 unsigned LineNo, DIType Type,bool isLocalToUnit,
                                 bool isDefinition, llvm::GlobalVariable *Val) {
@@ -914,7 +919,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
 
 /// CreateVariable - Create a new descriptor for the specified variable.
 DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
-                                     StringRef Name,
+                                     const char * Name,
                                      DICompileUnit CompileUnit, unsigned LineNo,
                                      DIType Type) {
   Value *Elts[] = {
@@ -976,60 +981,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
 // DIFactory: Routines for inserting code into a function
 //===----------------------------------------------------------------------===//
 
-/// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation,
-/// inserting it at the end of the specified basic block.
-void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo,
-                                unsigned ColNo, BasicBlock *BB) {
-
-  // Lazily construct llvm.dbg.stoppoint function.
-  if (!StopPointFn)
-    StopPointFn = llvm::Intrinsic::getDeclaration(&M,
-                                              llvm::Intrinsic::dbg_stoppoint);
-
-  // Invoke llvm.dbg.stoppoint
-  Value *Args[] = {
-    ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo),
-    ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo),
-    CU.getNode()
-  };
-  CallInst::Create(StopPointFn, Args, Args+3, "", BB);
-}
-
-/// InsertSubprogramStart - Create a new llvm.dbg.func.start intrinsic to
-/// mark the start of the specified subprogram.
-void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) {
-  // Lazily construct llvm.dbg.func.start.
-  if (!FuncStartFn)
-    FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start);
-
-  // Call llvm.dbg.func.start which also implicitly sets a stoppoint.
-  CallInst::Create(FuncStartFn, SP.getNode(), "", BB);
-}
-
-/// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to
-/// mark the start of a region for the specified scoping descriptor.
-void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) {
-  // Lazily construct llvm.dbg.region.start function.
-  if (!RegionStartFn)
-    RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start);
-
-  // Call llvm.dbg.func.start.
-  CallInst::Create(RegionStartFn, D.getNode(), "", BB);
-}
-
-/// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to
-/// mark the end of a region for the specified scoping descriptor.
-void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) {
-  // Lazily construct llvm.dbg.region.end function.
-  if (!RegionEndFn)
-    RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end);
-
-  // Call llvm.dbg.region.end.
-  CallInst::Create(RegionEndFn, D.getNode(), "", BB);
-}
-
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
                               Instruction *InsertBefore) {
   // Cast the storage to a {}* for the call to llvm.dbg.declare.
   Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore);
@@ -1038,11 +991,11 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { Storage, D.getNode() };
-  CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
 }
 
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
                               BasicBlock *InsertAtEnd) {
   // Cast the storage to a {}* for the call to llvm.dbg.declare.
   Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd);
@@ -1051,7 +1004,7 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { Storage, D.getNode() };
-  CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
 }
 
 
@@ -1062,38 +1015,18 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
 /// processModule - Process entire module and collect debug info.
 void DebugInfoFinder::processModule(Module &M) {
 
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   MetadataContext &TheMetadata = M.getContext().getMetadata();
   unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
-#endif
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
       for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
            ++BI) {
-        if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(BI))
-          processStopPoint(SPI);
-        else if (DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI))
-          processFuncStart(FSI);
-        else if (DbgRegionStartInst *DRS = dyn_cast<DbgRegionStartInst>(BI))
-          processRegionStart(DRS);
-        else if (DbgRegionEndInst *DRE = dyn_cast<DbgRegionEndInst>(BI))
-          processRegionEnd(DRE);
-        else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
           processDeclare(DDI);
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
-        else if (MDDbgKind) {
-          if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) {
-            DILocation Loc(L);
-            DIScope S(Loc.getScope().getNode());
-            if (S.isCompileUnit())
-              addCompileUnit(DICompileUnit(S.getNode()));
-            else if (S.isSubprogram())
-              processSubprogram(DISubprogram(S.getNode()));
-            else if (S.isLexicalBlock())
-              processLexicalBlock(DILexicalBlock(S.getNode()));
-          }
-        }
-#endif
+        else if (MDDbgKind) 
+          if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) 
+            processLocation(DILocation(L));
       }
 
   NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
@@ -1109,6 +1042,20 @@ void DebugInfoFinder::processModule(Module &M) {
   }
 }
 
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+  if (Loc.isNull()) return;
+  DIScope S(Loc.getScope().getNode());
+  if (S.isNull()) return;
+  if (S.isCompileUnit())
+    addCompileUnit(DICompileUnit(S.getNode()));
+  else if (S.isSubprogram())
+    processSubprogram(DISubprogram(S.getNode()));
+  else if (S.isLexicalBlock())
+    processLexicalBlock(DILexicalBlock(S.getNode()));
+  processLocation(Loc.getOrigLocation());
+}
+
 /// processType - Process DIType.
 void DebugInfoFinder::processType(DIType DT) {
   if (!addType(DT))
@@ -1156,30 +1103,6 @@ void DebugInfoFinder::processSubprogram(DISubprogram SP) {
   processType(SP.getType());
 }
 
-/// processStopPoint - Process DbgStopPointInst.
-void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) {
-  MDNode *Context = dyn_cast<MDNode>(SPI->getContext());
-  addCompileUnit(DICompileUnit(Context));
-}
-
-/// processFuncStart - Process DbgFuncStartInst.
-void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) {
-  MDNode *SP = dyn_cast<MDNode>(FSI->getSubprogram());
-  processSubprogram(DISubprogram(SP));
-}
-
-/// processRegionStart - Process DbgRegionStart.
-void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) {
-  MDNode *SP = dyn_cast<MDNode>(DRS->getContext());
-  processSubprogram(DISubprogram(SP));
-}
-
-/// processRegionEnd - Process DbgRegionEnd.
-void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) {
-  MDNode *SP = dyn_cast<MDNode>(DRE->getContext());
-  processSubprogram(DISubprogram(SP));
-}
-
 /// processDeclare - Process DbgDeclareInst.
 void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
   DIVariable DV(cast<MDNode>(DDI->getVariable()));
@@ -1475,22 +1398,4 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
 
     return DebugLoc::get(Id);
   }
-
-  /// isInlinedFnStart - Return true if FSI is starting an inlined function.
-  bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) {
-    DISubprogram Subprogram(cast<MDNode>(FSI.getSubprogram()));
-    if (Subprogram.describes(CurrentFn))
-      return false;
-
-    return true;
-  }
-
-  /// isInlinedFnEnd - Return true if REI is ending an inlined function.
-  bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) {
-    DISubprogram Subprogram(cast<MDNode>(REI.getContext()));
-    if (Subprogram.isNull() || Subprogram.describes(CurrentFn))
-      return false;
-
-    return true;
-  }
 }
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 17f304c..40a8cd5 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -518,7 +518,7 @@ namespace {
     /// getObject - Return the node corresponding to the memory object for the
     /// specified global or allocation instruction.
     unsigned getObject(Value *V) const {
-      DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V);
+      DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V);
       assert(I != ObjectNodes.end() &&
              "Value does not have an object in the points-to graph!");
       return I->second;
@@ -527,7 +527,7 @@ namespace {
     /// getReturnNode - Return the node representing the return value for the
     /// specified function.
     unsigned getReturnNode(Function *F) const {
-      DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F);
+      DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F);
       assert(I != ReturnNodes.end() && "Function does not return a value!");
       return I->second;
     }
@@ -535,7 +535,7 @@ namespace {
     /// getVarargNode - Return the node representing the variable arguments
     /// formal for the specified function.
     unsigned getVarargNode(Function *F) const {
-      DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F);
+      DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F);
       assert(I != VarargNodes.end() && "Function does not take var args!");
       return I->second;
     }
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 543e017..cf52320 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -151,6 +151,8 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
   if (L->contains(User->getParent())) return false;
 
   BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
 
   // Ok, the user is outside of the loop.  If it is dominated by the latch
   // block, use the post-inc value.
@@ -265,6 +267,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   return true;
 }
 
+void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+                      Instruction *User, Value *Operand) {
+  IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
+  if (!StrideUses) {    // First occurrence of this stride?
+    StrideOrder.push_back(Stride);
+    StrideUses = new IVUsersOfOneStride(Stride);
+    IVUses.push_back(StrideUses);
+    IVUsesByStride[Stride] = StrideUses;
+  }
+  IVUsesByStride[Stride]->addUser(Offset, User, Operand);
+}
+
 IVUsers::IVUsers()
  : LoopPass(&ID) {
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 0000000..f9953e3
--- /dev/null
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,348 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions.  For example, this does
+// constant folding, and can handle identities like (X&0)->0.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1,
+                             const TargetData *TD) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+  
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+  
+  // X & undef -> 0
+  if (isa<UndefValue>(Op1))
+    return Constant::getNullValue(Op0->getType());
+  
+  // X & X = X
+  if (Op0 == Op1)
+    return Op0;
+  
+  // X & <0,0> = <0,0>
+  if (isa<ConstantAggregateZero>(Op1))
+    return Op1;
+  
+  // X & <-1,-1> = X
+  if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
+    if (CP->isAllOnesValue())
+      return Op0;
+  
+  if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
+    // X & 0 = 0
+    if (Op1CI->isZero())
+      return Op1CI;
+    // X & -1 = X
+    if (Op1CI->isAllOnesValue())
+      return Op0;
+  }
+  
+  // A & ~A  =  ~A & A  =  0
+  Value *A, *B;
+  if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
+      (match(Op1, m_Not(m_Value(A))) && A == Op0))
+    return Constant::getNullValue(Op0->getType());
+  
+  // (A | ?) & A = A
+  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+  
+  // A & (A | ?) = A
+  if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+  
+  return 0;
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1,
+                            const TargetData *TD) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+    
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+  
+  // X | undef -> -1
+  if (isa<UndefValue>(Op1))
+    return Constant::getAllOnesValue(Op0->getType());
+  
+  // X | X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X | <0,0> = X
+  if (isa<ConstantAggregateZero>(Op1))
+    return Op0;
+  
+  // X | <-1,-1> = <-1,-1>
+  if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
+    if (CP->isAllOnesValue())            
+      return Op1;
+  
+  if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
+    // X | 0 = X
+    if (Op1CI->isZero())
+      return Op0;
+    // X | -1 = -1
+    if (Op1CI->isAllOnesValue())
+      return Op1CI;
+  }
+  
+  // A | ~A  =  ~A | A  =  -1
+  Value *A, *B;
+  if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
+      (match(Op1, m_Not(m_Value(A))) && A == Op0))
+    return Constant::getAllOnesValue(Op0->getType());
+  
+  // (A & ?) | A = A
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+  
+  // A | (A & ?) = A
+  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+  
+  return 0;
+}
+
+
+
+
+static const Type *GetCompareTy(Value *Op) {
+  return CmpInst::makeCmpResultType(Op->getType());
+}
+
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+  
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  
+  // ITy - This is the return type of the compare we're considering.
+  const Type *ITy = GetCompareTy(LHS);
+  
+  // icmp X, X -> true/false
+  if (LHS == RHS)
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  if (isa<UndefValue>(RHS))                  // X icmp undef -> undef
+    return UndefValue::get(ITy);
+  
+  // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
+  // addresses never equal each other!  We already know that Op0 != Op1.
+  if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) || 
+       isa<ConstantPointerNull>(LHS)) &&
+      (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || 
+       isa<ConstantPointerNull>(RHS)))
+    return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+  
+  // See if we are doing a comparison with a constant.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // If we have an icmp le or icmp ge instruction, turn it into the
+    // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
+    // them being folded in the code below.
+    switch (Pred) {
+    default: break;
+    case ICmpInst::ICMP_ULE:
+      if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
+        return ConstantInt::getTrue(CI->getContext());
+      break;
+    case ICmpInst::ICMP_SLE:
+      if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
+        return ConstantInt::getTrue(CI->getContext());
+      break;
+    case ICmpInst::ICMP_UGE:
+      if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
+        return ConstantInt::getTrue(CI->getContext());
+      break;
+    case ICmpInst::ICMP_SGE:
+      if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
+        return ConstantInt::getTrue(CI->getContext());
+      break;
+    }
+  }
+  
+  
+  return 0;
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+   
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  
+  // Fold trivial predicates.
+  if (Pred == FCmpInst::FCMP_FALSE)
+    return ConstantInt::get(GetCompareTy(LHS), 0);
+  if (Pred == FCmpInst::FCMP_TRUE)
+    return ConstantInt::get(GetCompareTy(LHS), 1);
+
+  if (isa<UndefValue>(RHS))                  // fcmp pred X, undef -> undef
+    return UndefValue::get(GetCompareTy(LHS));
+
+  // fcmp x,x -> true/false.  Not all compares are foldable.
+  if (LHS == RHS) {
+    if (CmpInst::isTrueWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 1);
+    if (CmpInst::isFalseWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 0);
+  }
+  
+  // Handle fcmp with constant RHS
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // If the constant is a nan, see if we can fold the comparison based on it.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->getValueAPF().isNaN()) {
+        if (FCmpInst::isOrdered(Pred))   // True "if ordered and foo"
+          return ConstantInt::getFalse(CFP->getContext());
+        assert(FCmpInst::isUnordered(Pred) &&
+               "Comparison must be either ordered or unordered!");
+        // True if unordered.
+        return ConstantInt::getTrue(CFP->getContext());
+      }
+    }
+  }
+  
+  return 0;
+}
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, 
+                           const TargetData *TD) {
+  switch (Opcode) {
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD);
+  case Instruction::Or:  return SimplifyOrInst(LHS, RHS, TD);
+  default:
+    if (Constant *CLHS = dyn_cast<Constant>(LHS))
+      if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+        Constant *COps[] = {CLHS, CRHS};
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+      }
+    return 0;
+  }
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                             const TargetData *TD) {
+  if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+    return SimplifyICmpInst(Predicate, LHS, RHS, TD);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, TD);
+}
+
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction.  If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
+  switch (I->getOpcode()) {
+  default:
+    return ConstantFoldInstruction(I, TD);
+  case Instruction::And:
+    return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD);
+  case Instruction::Or:
+    return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD);
+  case Instruction::ICmp:
+    return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+                            I->getOperand(0), I->getOperand(1), TD);
+  case Instruction::FCmp:
+    return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+                            I->getOperand(0), I->getOperand(1), TD);
+  }
+}
+
+/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+/// delete the From instruction.  In addition to a basic RAUW, this does a
+/// recursive simplification of the newly formed instructions.  This catches
+/// things where one simplification exposes other opportunities.  This only
+/// simplifies and deletes scalar operations, it does not change the CFG.
+///
+void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+                                     const TargetData *TD) {
+  assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
+  
+  // FromHandle - This keeps a weakvh on the from value so that we can know if
+  // it gets deleted out from under us in a recursive simplification.
+  WeakVH FromHandle(From);
+  
+  while (!From->use_empty()) {
+    // Update the instruction to use the new value.
+    Use &U = From->use_begin().getUse();
+    Instruction *User = cast<Instruction>(U.getUser());
+    U = To;
+    
+    // See if we can simplify it.
+    if (Value *V = SimplifyInstruction(User, TD)) {
+      // Recursively simplify this.
+      ReplaceAndSimplifyAllUses(User, V, TD);
+      
+      // If the recursive simplification ended up revisiting and deleting 'From'
+      // then we're done.
+      if (FromHandle == 0)
+        return;
+    }
+  }
+  From->eraseFromParent();
+}
+
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 0000000..5796c6f
--- /dev/null
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,582 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+char LazyValueInfo::ID = 0;
+static RegisterPass<LazyValueInfo>
+X("lazy-value-info", "Lazy Value Information Analysis", false, true);
+
+namespace llvm {
+  FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+  enum LatticeValueTy {
+    /// undefined - This LLVM Value has no known value yet.
+    undefined,
+    /// constant - This LLVM Value has a specific constant value.
+    constant,
+    
+    /// notconstant - This LLVM value is known to not have the specified value.
+    notconstant,
+    
+    /// overdefined - This instruction is not known to be constant, and we know
+    /// it has a value.
+    overdefined
+  };
+  
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'notconstant' value.
+  PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+  
+public:
+  LVILatticeVal() : Val(0, undefined) {}
+
+  static LVILatticeVal get(Constant *C) {
+    LVILatticeVal Res;
+    Res.markConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getNot(Constant *C) {
+    LVILatticeVal Res;
+    Res.markNotConstant(C);
+    return Res;
+  }
+  
+  bool isUndefined() const   { return Val.getInt() == undefined; }
+  bool isConstant() const    { return Val.getInt() == constant; }
+  bool isNotConstant() const { return Val.getInt() == notconstant; }
+  bool isOverdefined() const { return Val.getInt() == overdefined; }
+  
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val.getPointer();
+  }
+  
+  Constant *getNotConstant() const {
+    assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+    return Val.getPointer();
+  }
+  
+  /// markOverdefined - Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    Val.setInt(overdefined);
+    return true;
+  }
+
+  /// markConstant - Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    if (isConstant()) {
+      assert(getConstant() == V && "Marking constant with different value");
+      return false;
+    }
+    
+    assert(isUndefined());
+    Val.setInt(constant);
+    assert(V && "Marking constant with NULL");
+    Val.setPointer(V);
+    return true;
+  }
+  
+  /// markNotConstant - Return true if this is a change in status.
+  bool markNotConstant(Constant *V) {
+    if (isNotConstant()) {
+      assert(getNotConstant() == V && "Marking !constant with different value");
+      return false;
+    }
+    
+    if (isConstant())
+      assert(getConstant() != V && "Marking not constant with different value");
+    else
+      assert(isUndefined());
+
+    Val.setInt(notconstant);
+    assert(V && "Marking constant with NULL");
+    Val.setPointer(V);
+    return true;
+  }
+  
+  /// mergeIn - Merge the specified lattice value into this one, updating this
+  /// one and returning true if anything changed.
+  bool mergeIn(const LVILatticeVal &RHS) {
+    if (RHS.isUndefined() || isOverdefined()) return false;
+    if (RHS.isOverdefined()) return markOverdefined();
+
+    if (RHS.isNotConstant()) {
+      if (isNotConstant()) {
+        if (getNotConstant() != RHS.getNotConstant() ||
+            isa<ConstantExpr>(getNotConstant()) ||
+            isa<ConstantExpr>(RHS.getNotConstant()))
+          return markOverdefined();
+        return false;
+      }
+      if (isConstant()) {
+        if (getConstant() == RHS.getNotConstant() ||
+            isa<ConstantExpr>(RHS.getNotConstant()) ||
+            isa<ConstantExpr>(getConstant()))
+          return markOverdefined();
+        return markNotConstant(RHS.getNotConstant());
+      }
+      
+      assert(isUndefined() && "Unexpected lattice");
+      return markNotConstant(RHS.getNotConstant());
+    }
+    
+    // RHS must be a constant, we must be undef, constant, or notconstant.
+    if (isUndefined())
+      return markConstant(RHS.getConstant());
+    
+    if (isConstant()) {
+      if (getConstant() != RHS.getConstant())
+        return markOverdefined();
+      return false;
+    }
+
+    // If we are known "!=4" and RHS is "==5", stay at "!=4".
+    if (getNotConstant() == RHS.getConstant() ||
+        isa<ConstantExpr>(getNotConstant()) ||
+        isa<ConstantExpr>(RHS.getConstant()))
+      return markOverdefined();
+    return false;
+  }
+  
+};
+  
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+  if (Val.isUndefined())
+    return OS << "undefined";
+  if (Val.isOverdefined())
+    return OS << "overdefined";
+
+  if (Val.isNotConstant())
+    return OS << "notconstant<" << *Val.getNotConstant() << '>';
+  return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+//                          LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+  /// maintains information about queries across the clients' queries.
+  class LazyValueInfoCache {
+  public:
+    /// BlockCacheEntryTy - This is a computed lattice value at the end of the
+    /// specified basic block for a Value* that depends on context.
+    typedef std::pair<BasicBlock*, LVILatticeVal> BlockCacheEntryTy;
+    
+    /// ValueCacheEntryTy - This is all of the cached block information for
+    /// exactly one Value*.  The entries are sorted by the BasicBlock* of the
+    /// entries, allowing us to do a lookup with a binary search.
+    typedef std::vector<BlockCacheEntryTy> ValueCacheEntryTy;
+
+  private:
+    /// ValueCache - This is all of the cached information for all values,
+    /// mapped from Value* to key information.
+    DenseMap<Value*, ValueCacheEntryTy> ValueCache;
+  public:
+    
+    /// getValueInBlock - This is the query interface to determine the lattice
+    /// value for the specified Value* at the end of the specified block.
+    LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+    /// getValueOnEdge - This is the query interface to determine the lattice
+    /// value for the specified Value* that is true on the specified edge.
+    LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+  };
+} // end anonymous namespace
+
+namespace {
+  struct BlockCacheEntryComparator {
+    static int Compare(const void *LHSv, const void *RHSv) {
+      const LazyValueInfoCache::BlockCacheEntryTy *LHS =
+        static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(LHSv);
+      const LazyValueInfoCache::BlockCacheEntryTy *RHS =
+        static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(RHSv);
+      if (LHS->first < RHS->first)
+        return -1;
+      if (LHS->first > RHS->first)
+        return 1;
+      return 0;
+    }
+    
+    bool operator()(const LazyValueInfoCache::BlockCacheEntryTy &LHS,
+                    const LazyValueInfoCache::BlockCacheEntryTy &RHS) const {
+      return LHS.first < RHS.first;
+    }
+  };
+}
+
+//===----------------------------------------------------------------------===//
+//                              LVIQuery Impl
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// LVIQuery - This is a transient object that exists while a query is
+  /// being performed.
+  ///
+  /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids
+  /// reallocation of the densemap on every query.
+  class LVIQuery {
+    typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy;
+    typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy;
+    
+    /// This is the current value being queried for.
+    Value *Val;
+    
+    /// This is all of the cached information about this value.
+    ValueCacheEntryTy &Cache;
+    
+    ///  NewBlocks - This is a mapping of the new BasicBlocks which have been
+    /// added to cache but that are not in sorted order.
+    DenseMap<BasicBlock*, LVILatticeVal> NewBlockInfo;
+  public:
+    
+    LVIQuery(Value *V, ValueCacheEntryTy &VC) : Val(V), Cache(VC) {
+    }
+
+    ~LVIQuery() {
+      // When the query is done, insert the newly discovered facts into the
+      // cache in sorted order.
+      if (NewBlockInfo.empty()) return;
+
+      // Grow the cache to exactly fit the new data.
+      Cache.reserve(Cache.size() + NewBlockInfo.size());
+      
+      // If we only have one new entry, insert it instead of doing a full-on
+      // sort.
+      if (NewBlockInfo.size() == 1) {
+        BlockCacheEntryTy Entry = *NewBlockInfo.begin();
+        ValueCacheEntryTy::iterator I =
+          std::lower_bound(Cache.begin(), Cache.end(), Entry,
+                           BlockCacheEntryComparator());
+        assert((I == Cache.end() || I->first != Entry.first) &&
+               "Entry already in map!");
+        
+        Cache.insert(I, Entry);
+        return;
+      }
+      
+      // TODO: If we only have two new elements, INSERT them both.
+      
+      Cache.insert(Cache.end(), NewBlockInfo.begin(), NewBlockInfo.end());
+      array_pod_sort(Cache.begin(), Cache.end(),
+                     BlockCacheEntryComparator::Compare);
+      
+    }
+
+    LVILatticeVal getBlockValue(BasicBlock *BB);
+    LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB);
+
+  private:
+    LVILatticeVal &getCachedEntryForBlock(BasicBlock *BB);
+  };
+} // end anonymous namespace
+
+/// getCachedEntryForBlock - See if we already have a value for this block.  If
+/// so, return it, otherwise create a new entry in the NewBlockInfo map to use.
+LVILatticeVal &LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
+  
+  // Do a binary search to see if we already have an entry for this block in
+  // the cache set.  If so, find it.
+  if (!Cache.empty()) {
+    ValueCacheEntryTy::iterator Entry =
+      std::lower_bound(Cache.begin(), Cache.end(),
+                       BlockCacheEntryTy(BB, LVILatticeVal()),
+                       BlockCacheEntryComparator());
+    if (Entry != Cache.end() && Entry->first == BB)
+      return Entry->second;
+  }
+  
+  // Otherwise, check to see if it's in NewBlockInfo or create a new entry if
+  // not.
+  return NewBlockInfo[BB];
+}
+
+LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
+  // See if we already have a value for this block.
+  LVILatticeVal &BBLV = getCachedEntryForBlock(BB);
+  
+  // If we've already computed this block's value, return it.
+  if (!BBLV.isUndefined()) {
+    DEBUG(errs() << "  reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+    return BBLV;
+  }
+
+  // Otherwise, this is the first time we're seeing this block.  Reset the
+  // lattice value to overdefined, so that cycles will terminate and be
+  // conservatively correct.
+  BBLV.markOverdefined();
+  
+  // If V is live into BB, see if our predecessors know anything about it.
+  Instruction *BBI = dyn_cast<Instruction>(Val);
+  if (BBI == 0 || BBI->getParent() != BB) {
+    LVILatticeVal Result;  // Start Undefined.
+    unsigned NumPreds = 0;
+    
+    // Loop over all of our predecessors, merging what we know from them into
+    // result.
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      Result.mergeIn(getEdgeValue(*PI, BB));
+      
+      // If we hit overdefined, exit early.  The BlockVals entry is already set
+      // to overdefined.
+      if (Result.isOverdefined()) {
+        DEBUG(errs() << " compute BB '" << BB->getName()
+                     << "' - overdefined because of pred.\n");
+        return Result;
+      }
+      ++NumPreds;
+    }
+    
+    // If this is the entry block, we must be asking about an argument.  The
+    // value is overdefined.
+    if (NumPreds == 0 && BB == &BB->getParent()->front()) {
+      assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+      Result.markOverdefined();
+      return Result;
+    }
+    
+    // Return the merged value, which is more precise than 'overdefined'.
+    assert(!Result.isOverdefined());
+    return getCachedEntryForBlock(BB) = Result;
+  }
+  
+  // If this value is defined by an instruction in this block, we have to
+  // process it here somehow or return overdefined.
+  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+    (void)PN;
+    // TODO: PHI Translation in preds.
+  } else {
+    
+  }
+  
+  DEBUG(errs() << " compute BB '" << BB->getName()
+               << "' - overdefined because inst def found.\n");
+
+  LVILatticeVal Result;
+  Result.markOverdefined();
+  return getCachedEntryForBlock(BB) = Result;
+}
+
+
+/// getEdgeValue - This method attempts to infer more complex 
+LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
+  // TODO: Handle more complex conditionals.  If (v == 0 || v2 < 1) is false, we
+  // know that v != 0.
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+    // If this is a conditional branch and only one successor goes to BBTo, then
+    // we maybe able to infer something from the condition. 
+    if (BI->isConditional() &&
+        BI->getSuccessor(0) != BI->getSuccessor(1)) {
+      bool isTrueDest = BI->getSuccessor(0) == BBTo;
+      assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+             "BBTo isn't a successor of BBFrom");
+      
+      // If V is the condition of the branch itself, then we know exactly what
+      // it is.
+      if (BI->getCondition() == Val)
+        return LVILatticeVal::get(ConstantInt::get(
+                               Type::getInt1Ty(Val->getContext()), isTrueDest));
+      
+      // If the condition of the branch is an equality comparison, we may be
+      // able to infer the value.
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+        if (ICI->isEquality() && ICI->getOperand(0) == Val &&
+            isa<Constant>(ICI->getOperand(1))) {
+          // We know that V has the RHS constant if this is a true SETEQ or
+          // false SETNE. 
+          if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+            return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+          return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+        }
+    }
+  }
+
+  // If the edge was formed by a switch on the value, then we may know exactly
+  // what it is.
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+    // If BBTo is the default destination of the switch, we don't know anything.
+    // Given a more powerful range analysis we could know stuff.
+    if (SI->getCondition() == Val && SI->getDefaultDest() != BBTo) {
+      // We only know something if there is exactly one value that goes from
+      // BBFrom to BBTo.
+      unsigned NumEdges = 0;
+      ConstantInt *EdgeVal = 0;
+      for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+        if (SI->getSuccessor(i) != BBTo) continue;
+        if (NumEdges++) break;
+        EdgeVal = SI->getCaseValue(i);
+      }
+      assert(EdgeVal && "Missing successor?");
+      if (NumEdges == 1)
+        return LVILatticeVal::get(EdgeVal);
+    }
+  }
+  
+  // Otherwise see if the value is known in the block.
+  return getBlockValue(BBFrom);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         LazyValueInfoCache Impl
+//===----------------------------------------------------------------------===//
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(V))
+    return LVILatticeVal::get(VC);
+  
+  DEBUG(errs() << "LVI Getting block end value " << *V << " at '"
+        << BB->getName() << "'\n");
+  
+  LVILatticeVal Result = LVIQuery(V, ValueCache[V]).getBlockValue(BB);
+  
+  DEBUG(errs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(V))
+    return LVILatticeVal::get(VC);
+  
+  DEBUG(errs() << "LVI Getting edge value " << *V << " from '"
+        << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+  LVILatticeVal Result =
+    LVIQuery(V, ValueCache[V]).getEdgeValue(FromBB, ToBB);
+  
+  DEBUG(errs() << "  Result = " << Result << "\n");
+  
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                            LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+  // Fully lazy.
+  return false;
+}
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+  if (!PImpl)
+    PImpl = new LazyValueInfoCache();
+  return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+void LazyValueInfo::releaseMemory() {
+  // If the cache was allocated, free it.
+  if (PImpl) {
+    delete &getCache(PImpl);
+    PImpl = 0;
+  }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+  LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+  
+  if (Result.isConstant())
+    return Result.getConstant();
+  return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge.  Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+                                           BasicBlock *ToBB) {
+  LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+  
+  if (Result.isConstant())
+    return Result.getConstant();
+  return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+                                  BasicBlock *FromBB, BasicBlock *ToBB) {
+  LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+  
+  // If we know the value is a constant, evaluate the conditional.
+  Constant *Res = 0;
+  if (Result.isConstant()) {
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+    if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res))
+      return ResCI->isZero() ? False : True;
+    return Unknown;
+  }
+  
+  if (Result.isNotConstant()) {
+    // If this is an equality comparison, we can try to fold it knowing that
+    // "V != C1".
+    if (Pred == ICmpInst::ICMP_EQ) {
+      // !C1 == C -> false iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, TD);
+      if (Res->isNullValue())
+        return False;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      // !C1 != C -> true iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, TD);
+      if (Res->isNullValue())
+        return True;
+    }
+    return Unknown;
+  }
+  
+  return Unknown;
+}
+
+
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 2bbe98a..02ec7d3 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -17,7 +17,9 @@
 #include "llvm/Analysis/LoopInfo.h"
 using namespace llvm;
 
-FunctionPass *llvm::createLiveValuesPass() { return new LiveValues(); }
+namespace llvm {
+  FunctionPass *createLiveValuesPass() { return new LiveValues(); }
+}
 
 char LiveValues::ID = 0;
 static RegisterPass<LiveValues>
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index e9256b7..1c614b0 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -263,14 +263,13 @@ bool Loop::isLCSSAForm() const {
   SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
 
   for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
-    BasicBlock  *BB = *BI;
-    for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+    BasicBlock *BB = *BI;
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
       for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
            ++UI) {
         BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-        if (PHINode *P = dyn_cast<PHINode>(*UI)) {
+        if (PHINode *P = dyn_cast<PHINode>(*UI))
           UserBB = P->getIncomingBlock(UI);
-        }
 
         // Check the current block, as a fast-path.  Most values are used in
         // the same block they are defined in.
@@ -286,12 +285,14 @@ bool Loop::isLCSSAForm() const {
 /// the LoopSimplify form transforms loops to, which is sometimes called
 /// normal form.
 bool Loop::isLoopSimplifyForm() const {
-  // Normal-form loops have a preheader.
-  if (!getLoopPreheader())
-    return false;
-  // Normal-form loops have a single backedge.
-  if (!getLoopLatch())
-    return false;
+  // Normal-form loops have a preheader, a single backedge, and all of their
+  // exits have all their predecessors inside the loop.
+  return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
   // Sort the blocks vector so that we can use binary search to do quick
   // lookups.
   SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index f4eb793..b448628 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 using namespace llvm;
 
@@ -87,13 +87,8 @@ const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
                                       : NULL;
 }
 
-/// isConstantOne - Return true only if val is constant int 1.
-static bool isConstantOne(Value *val) {
-  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
-}
-
-static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
-                                  const TargetData *TD) {
+static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
+                               bool LookThroughSExt = false) {
   if (!CI)
     return NULL;
 
@@ -102,99 +97,27 @@ static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
   if (!T || !T->isSized() || !TD)
     return NULL;
 
-  Value *MallocArg = CI->getOperand(1);
-  const Type *ArgType = MallocArg->getType();
-  ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg);
-  BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg);
-
-  unsigned ElementSizeInt = TD->getTypeAllocSize(T);
+  unsigned ElementSize = TD->getTypeAllocSize(T);
   if (const StructType *ST = dyn_cast<StructType>(T))
-    ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes();
-  Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt);
-
-  // First, check if CI is a non-array malloc.
-  if (CO && CO == ElementSize)
-    // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
-    return ConstantInt::get(ArgType, 1);
-
-  // Second, check if CI is an array malloc whose array size can be determined.
-  if (isConstantOne(ElementSize))
-    return MallocArg;
-
-  if (ConstantInt *CInt = dyn_cast<ConstantInt>(MallocArg))
-    if (CInt->getZExtValue() % ElementSizeInt == 0)
-      return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt);
+    ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
 
-  if (!CO && !BO)
-    return NULL;
-
-  Value *Op0 = NULL;
-  Value *Op1 = NULL;
-  unsigned Opcode = 0;
-  if (CO && ((CO->getOpcode() == Instruction::Mul) ||
-             (CO->getOpcode() == Instruction::Shl))) {
-    Op0 = CO->getOperand(0);
-    Op1 = CO->getOperand(1);
-    Opcode = CO->getOpcode();
-  }
-  if (BO && ((BO->getOpcode() == Instruction::Mul) ||
-             (BO->getOpcode() == Instruction::Shl))) {
-    Op0 = BO->getOperand(0);
-    Op1 = BO->getOperand(1);
-    Opcode = BO->getOpcode();
-  }
-
-  // Determine array size if malloc's argument is the product of a mul or shl.
-  if (Op0) {
-    if (Opcode == Instruction::Mul) {
-      if (Op1 == ElementSize)
-        // ArraySize * ElementSize
-        return Op0;
-      if (Op0 == ElementSize)
-        // ElementSize * ArraySize
-        return Op1;
-    }
-    if (Opcode == Instruction::Shl) {
-      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
-      if (!Op1CI) return NULL;
-      
-      APInt Op1Int = Op1CI->getValue();
-      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
-      Value *Op1Pow = ConstantInt::get(Context, 
-                                  APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
-      if (Op0 == ElementSize)
-        // ArraySize << log2(ElementSize)
-        return Op1Pow;
-      if (Op1Pow == ElementSize)
-        // ElementSize << log2(ArraySize)
-        return Op0;
-    }
-  }
+  // If malloc calls' arg can be determined to be a multiple of ElementSize,
+  // return the multiple.  Otherwise, return NULL.
+  Value *MallocArg = CI->getOperand(1);
+  Value *Multiple = NULL;
+  if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+                      LookThroughSExt))
+    return Multiple;
 
-  // We could not determine the malloc array size from MallocArg.
   return NULL;
 }
 
 /// isArrayMalloc - Returns the corresponding CallInst if the instruction 
 /// is a call to malloc whose array size can be determined and the array size
 /// is not constant 1.  Otherwise, return NULL.
-CallInst *llvm::isArrayMalloc(Value *I, LLVMContext &Context,
-                              const TargetData *TD) {
-  CallInst *CI = extractMallocCall(I);
-  Value *ArraySize = isArrayMallocHelper(CI, Context, TD);
-
-  if (ArraySize &&
-      ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
-    return CI;
-
-  // CI is a non-array malloc or we can't figure out that it is an array malloc.
-  return NULL;
-}
-
-const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context,
-                                    const TargetData *TD) {
+const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
   const CallInst *CI = extractMallocCall(I);
-  Value *ArraySize = isArrayMallocHelper(CI, Context, TD);
+  Value *ArraySize = computeArraySize(CI, TD);
 
   if (ArraySize &&
       ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
@@ -210,7 +133,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context,
 ///   1: PointerType is the bitcast's result type.
 ///  >1: Unique PointerType cannot be determined, return NULL.
 const PointerType *llvm::getMallocType(const CallInst *CI) {
-  assert(isMalloc(CI) && "GetMallocType and not malloc call");
+  assert(isMalloc(CI) && "getMallocType and not malloc call");
   
   const PointerType *MallocType = NULL;
   unsigned NumOfBitCastUses = 0;
@@ -250,9 +173,10 @@ const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
 /// then return that multiple.  For non-array mallocs, the multiple is
 /// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
 /// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, LLVMContext &Context,
-                                const TargetData *TD) {
-  return isArrayMallocHelper(CI, Context, TD);
+Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
+                                bool LookThroughSExt) {
+  assert(isMalloc(CI) && "getMallocArraySize and not malloc call");
+  return computeArraySize(CI, TD, LookThroughSExt);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 2251b62..8da07e7 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -10,6 +10,7 @@
 // This file implements tracking of pointer bounds.
 //
 //===----------------------------------------------------------------------===//
+
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -101,7 +102,7 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P,
   }
 
   if (CallInst *CI = extractMallocCall(V)) {
-    Value *arraySize = getMallocArraySize(CI, P->getContext(), TD);
+    Value *arraySize = getMallocArraySize(CI, TD);
     const Type* AllocTy = getMallocAllocatedType(CI);
     if (!AllocTy || !arraySize) return SE->getCouldNotCompute();
     Ty = AllocTy;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 3e87ca2..ea4af40 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3811,29 +3811,26 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
 /// in the loop has the value PHIVal.  If we can't fold this expression for some
 /// reason, return null.
-static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+                                    const TargetData *TD) {
   if (isa<PHINode>(V)) return PHIVal;
   if (Constant *C = dyn_cast<Constant>(V)) return C;
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV;
   Instruction *I = cast<Instruction>(V);
-  LLVMContext &Context = I->getParent()->getContext();
 
   std::vector<Constant*> Operands;
   Operands.resize(I->getNumOperands());
 
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-    Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal);
+    Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
     if (Operands[i] == 0) return 0;
   }
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Operands[0], Operands.size(),
-                                           Context);
-  else
-    return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                    &Operands[0], Operands.size(),
-                                    Context);
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+                                           Operands[1], TD);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  &Operands[0], Operands.size(), TD);
 }
 
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -3879,7 +3876,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
       return RetVal = PHIVal;  // Got exit value!
 
     // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
+    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
     if (NextPHI == PHIVal)
       return RetVal = NextPHI;  // Stopped evolving!
     if (NextPHI == 0)
@@ -3920,7 +3917,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
   for (Constant *PHIVal = StartCST;
        IterationNum != MaxIterations; ++IterationNum) {
     ConstantInt *CondVal =
-      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal));
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -3931,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
     }
 
     // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
+    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
     if (NextPHI == 0 || NextPHI == PHIVal)
       return getCouldNotCompute();// Couldn't evaluate or not making progress...
     PHIVal = NextPHI;
@@ -4040,12 +4037,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
         Constant *C;
         if (const CmpInst *CI = dyn_cast<CmpInst>(I))
           C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                              &Operands[0], Operands.size(),
-                                              getContext());
+                                              Operands[0], Operands[1], TD);
         else
           C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                       &Operands[0], Operands.size(),
-                                       getContext());
+                                       &Operands[0], Operands.size(), TD);
         return getSCEV(C);
       }
     }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 5672510..b0e6900 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -789,6 +789,118 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
   return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
 }
 
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V.  If successful, it returns true and returns the multiple in
+/// Multiple.  If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+                           bool LookThroughSExt, unsigned Depth) {
+  const unsigned MaxDepth = 6;
+
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  assert(V->getType()->isInteger() && "Not integer or pointer type!");
+
+  const Type *T = V->getType();
+
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+  if (Base == 0)
+    return false;
+    
+  if (Base == 1) {
+    Multiple = V;
+    return true;
+  }
+
+  ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+  Constant *BaseVal = ConstantInt::get(T, Base);
+  if (CO && CO == BaseVal) {
+    // Multiple is 1.
+    Multiple = ConstantInt::get(T, 1);
+    return true;
+  }
+
+  if (CI && CI->getZExtValue() % Base == 0) {
+    Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+    return true;  
+  }
+  
+  if (Depth == MaxDepth) return false;  // Limit search depth.
+        
+  Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::SExt: {
+    if (!LookThroughSExt) return false;
+    // otherwise fall through to ZExt
+  }
+  case Instruction::ZExt: {
+    return ComputeMultiple(I->getOperand(0), Base, Multiple,
+                           LookThroughSExt, Depth+1);
+  }
+  case Instruction::Shl:
+  case Instruction::Mul: {
+    Value *Op0 = I->getOperand(0);
+    Value *Op1 = I->getOperand(1);
+
+    if (I->getOpcode() == Instruction::Shl) {
+      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+      if (!Op1CI) return false;
+      // Turn Op0 << Op1 into Op0 * 2^Op1
+      APInt Op1Int = Op1CI->getValue();
+      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+      Op1 = ConstantInt::get(V->getContext(), 
+                             APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
+    }
+
+    Value *Mul0 = NULL;
+    Value *Mul1 = NULL;
+    bool M0 = ComputeMultiple(Op0, Base, Mul0,
+                              LookThroughSExt, Depth+1);
+    bool M1 = ComputeMultiple(Op1, Base, Mul1,
+                              LookThroughSExt, Depth+1);
+
+    if (M0) {
+      if (isa<Constant>(Op1) && isa<Constant>(Mul0)) {
+        // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+        Multiple = ConstantExpr::getMul(cast<Constant>(Mul0),
+                                        cast<Constant>(Op1));
+        return true;
+      }
+
+      if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+        if (Mul0CI->getValue() == 1) {
+          // V == Base * Op1, so return Op1
+          Multiple = Op1;
+          return true;
+        }
+    }
+
+    if (M1) {
+      if (isa<Constant>(Op0) && isa<Constant>(Mul1)) {
+        // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+        Multiple = ConstantExpr::getMul(cast<Constant>(Mul1),
+                                        cast<Constant>(Op0));
+        return true;
+      }
+
+      if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+        if (Mul1CI->getValue() == 1) {
+          // V == Base * Op0, so return Op0
+          Multiple = Op0;
+          return true;
+        }
+    }
+  }
+  }
+
+  // We could not determine if V is a multiple of Base.
+  return false;
+}
+
 /// CannotBeNegativeZero - Return true if we can prove that the specified FP 
 /// value is never equal to -0.0.
 ///
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 63af42d..26b6a09 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2039,7 +2039,7 @@ bool LLParser::ParseValID(ValID &ID) {
         ParseToken(lltok::StringConstant, "expected constraint string"))
       return true;
     ID.StrVal2 = Lex.getStrVal();
-    ID.UIntVal = HasSideEffect | ((unsigned)AlignStack<<1);
+    ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1);
     ID.Kind = ValID::t_InlineAsm;
     return false;
   }
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 86d051c..c37c793 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -54,10 +54,13 @@ unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
   return Node;
 }
 
-void AggressiveAntiDepState::GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs)
+void AggressiveAntiDepState::GetGroupRegs(
+  unsigned Group,
+  std::vector<unsigned> &Regs,
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
 {
   for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
-    if (GetGroup(Reg) == Group)
+    if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
       Regs.push_back(Reg);
   }
 }
@@ -99,12 +102,28 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
 
 
 AggressiveAntiDepBreaker::
-AggressiveAntiDepBreaker(MachineFunction& MFi) : 
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+                         TargetSubtarget::RegClassVector& CriticalPathRCs) : 
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
   AllocatableSet(TRI->getAllocatableSet(MF)),
   State(NULL), SavedState(NULL) {
+  /* Collect a bitset of all registers that are only broken if they
+     are on the critical path. */
+  for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+    BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+    if (CriticalPathSet.none())
+      CriticalPathSet = CPSet;
+    else
+      CriticalPathSet |= CPSet;
+   }
+ 
+  DEBUG(errs() << "AntiDep Critical-Path Registers:");
+  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; 
+             r = CriticalPathSet.find_next(r))
+          errs() << " " << TRI->getName(r));
+  DEBUG(errs() << '\n');
 }
 
 AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
@@ -264,16 +283,18 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
   }
 }
 
-/// AntiDepPathStep - Return SUnit that SU has an anti-dependence on.
-static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
-                            std::vector<SDep*>& Edges) {
+/// AntiDepEdges - Return in Edges the anti- and output-
+/// dependencies on Regs in SU that we want to consider for breaking.
+static void AntiDepEdges(SUnit *SU, 
+                         const AntiDepBreaker::AntiDepRegVector& Regs,
+                         std::vector<SDep*>& Edges) {
   AntiDepBreaker::AntiDepRegSet RegSet;
   for (unsigned i = 0, e = Regs.size(); i < e; ++i)
     RegSet.insert(Regs[i]);
 
   for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
-    if (P->getKind() == SDep::Anti) {
+    if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
       unsigned Reg = P->getReg();
       if (RegSet.count(Reg) != 0) {
         Edges.push_back(&*P);
@@ -285,6 +306,31 @@ static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
   assert(RegSet.empty() && "Expected all antidep registers to be found");
 }
 
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SUnit *CriticalPathStep(SUnit *SU) {
+  SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  if (SU != 0) {
+    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+         P != PE; ++P) {
+      SUnit *PredSU = P->getSUnit();
+      unsigned PredLatency = P->getLatency();
+      unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+      // In the case of a latency tie, prefer an anti-dependency edge over
+      // other types of edges.
+      if (NextDepth < PredTotalLatency ||
+          (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+        NextDepth = PredTotalLatency;
+        Next = &*P;
+      }
+    }
+  }
+
+  return (Next) ? Next->getSUnit() : 0;
+}
+
 void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
                                              const char *tag) {
   unsigned *KillIndices = State->GetKillIndices();
@@ -499,11 +545,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
     RegRefs = State->GetRegRefs();
 
-  // Collect all registers in the same group as AntiDepReg. These all
-  // need to be renamed together if we are to break the
-  // anti-dependence.
+  // Collect all referenced registers in the same group as
+  // AntiDepReg. These all need to be renamed together if we are to
+  // break the anti-dependence.
   std::vector<unsigned> Regs;
-  State->GetGroupRegs(AntiDepGroupIndex, Regs);
+  State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
   assert(Regs.size() > 0 && "Empty register group!");
   if (Regs.size() == 0)
     return false;
@@ -544,9 +590,10 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   }
 
   // FIXME: for now just handle single register in group case...
-  // FIXME: check only regs that have references...
-  if (Regs.size() > 1)
+  if (Regs.size() > 1) {
+    DEBUG(errs() << "\tMultiple rename registers in group\n");
     return false;
+  }
 
   // Check each possible rename register for SuperReg in round-robin
   // order. If that register is available, and the corresponding
@@ -630,12 +677,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
     RegRefs = State->GetRegRefs();
 
-  // Nothing to do if no candidates.
-  if (Candidates.empty()) {
-    DEBUG(errs() << "\n===== No anti-dependency candidates\n");
-    return 0;
-  }
-
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
@@ -655,16 +696,37 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
   // ...need a map from MI to SUnit.
   std::map<MachineInstr *, SUnit *> MISUnitMap;
-
-  DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << 
-        " anti-dependencies\n");
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     SUnit *SU = &SUnits[i];
     MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
   }
 
+  // Track progress along the critical path through the SUnit graph as
+  // we walk the instructions. This is needed for regclasses that only
+  // break critical-path anti-dependencies.
+  SUnit *CriticalPathSU = 0;
+  MachineInstr *CriticalPathMI = 0;
+  if (CriticalPathSet.any()) {
+    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+      SUnit *SU = &SUnits[i];
+      if (!CriticalPathSU || 
+          ((SU->getDepth() + SU->Latency) > 
+           (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+        CriticalPathSU = SU;
+      }
+    }
+    
+    CriticalPathMI = CriticalPathSU->getInstr();
+  }
+
+  // Even if there are no anti-dependencies we still need to go
+  // through the instructions to update Def, Kills, etc.
 #ifndef NDEBUG 
-  {
+  if (Candidates.empty()) {
+    DEBUG(errs() << "\n===== No anti-dependency candidates\n");
+  } else {
+    DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << 
+          " anti-dependencies\n");
     DEBUG(errs() << "Available regs:");
     for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
       if (!State->IsLive(Reg))
@@ -691,14 +753,26 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     // Process the defs in MI...
     PrescanInstruction(MI, Count, PassthruRegs);
-
+    
+    // The the dependence edges that represent anti- and output-
+    // dependencies that are candidates for breaking.
     std::vector<SDep*> Edges;
     SUnit *PathSU = MISUnitMap[MI];
     AntiDepBreaker::CandidateMap::iterator 
       citer = Candidates.find(PathSU);
     if (citer != Candidates.end())
-      AntiDepPathStep(PathSU, citer->second, Edges);
-      
+      AntiDepEdges(PathSU, citer->second, Edges);
+
+    // If MI is not on the critical path, then we don't rename
+    // registers in the CriticalPathSet.
+    BitVector *ExcludeRegs = NULL;
+    if (MI == CriticalPathMI) {
+      CriticalPathSU = CriticalPathStep(CriticalPathSU);
+      CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+    } else { 
+      ExcludeRegs = &CriticalPathSet;
+    }
+
     // Ignore KILL instructions (they form a group in ScanInstruction
     // but don't cause any anti-dependence breaking themselves)
     if (MI->getOpcode() != TargetInstrInfo::KILL) {
@@ -707,7 +781,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         SDep *Edge = Edges[i];
         SUnit *NextSU = Edge->getSUnit();
         
-        if (Edge->getKind() != SDep::Anti) continue;
+        if ((Edge->getKind() != SDep::Anti) &&
+            (Edge->getKind() != SDep::Output)) continue;
         
         unsigned AntiDepReg = Edge->getReg();
         DEBUG(errs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
@@ -717,6 +792,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(errs() << " (non-allocatable)\n");
           continue;
+        } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+          // Don't break anti-dependencies for critical path registers
+          // if not on the critical path
+          DEBUG(errs() << " (not critical-path)\n");
+          continue;
         } else if (PassthruRegs.count(AntiDepReg) != 0) {
           // If the anti-dep register liveness "passes-thru", then
           // don't try to change it. It will be changed along with
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index c512168..e5c9a7b 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtarget.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
@@ -85,8 +86,11 @@ namespace llvm {
     unsigned GetGroup(unsigned Reg);
     
     // GetGroupRegs - Return a vector of the registers belonging to a
-    // group.
-    void GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs);
+    // group. If RegRefs is non-NULL then only included referenced registers.
+    void GetGroupRegs(
+       unsigned Group,
+       std::vector<unsigned> &Regs,
+       std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs);
 
     // UnionGroups - Union Reg1's and Reg2's groups to form a new
     // group. Return the index of the GroupNode representing the
@@ -114,6 +118,10 @@ namespace llvm {
     /// because they may not be safe to break.
     const BitVector AllocatableSet;
 
+    /// CriticalPathSet - The set of registers that should only be
+    /// renamed if they are on the critical path.
+    BitVector CriticalPathSet;
+
     /// State - The state used to identify and rename anti-dependence
     /// registers.
     AggressiveAntiDepState *State;
@@ -124,7 +132,8 @@ namespace llvm {
     AggressiveAntiDepState *SavedState;
 
   public:
-    AggressiveAntiDepBreaker(MachineFunction& MFi);
+    AggressiveAntiDepBreaker(MachineFunction& MFi, 
+                             TargetSubtarget::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
     
     /// GetMaxTrials - As anti-dependencies are broken, additional
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 2775087..b614f68 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -23,6 +23,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include <map>
 
 namespace llvm {
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bb6bd95..08e0eae 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -35,6 +36,7 @@
 #include "llvm/Support/Mangler.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
@@ -512,7 +514,7 @@ void AsmPrinter::EmitXXStructorList(Constant *List) {
 //===----------------------------------------------------------------------===//
 /// LEB 128 number encoding.
 
-/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// PrintULEB128 - Print a series of hexadecimal values (separated by commas)
 /// representing an unsigned leb128 value.
 void AsmPrinter::PrintULEB128(unsigned Value) const {
   char Buffer[20];
@@ -525,7 +527,7 @@ void AsmPrinter::PrintULEB128(unsigned Value) const {
   } while (Value);
 }
 
-/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// PrintSLEB128 - Print a series of hexadecimal values (separated by commas)
 /// representing a signed leb128 value.
 void AsmPrinter::PrintSLEB128(int Value) const {
   int Sign = Value >> (8 * sizeof(Value) - 1);
@@ -546,7 +548,7 @@ void AsmPrinter::PrintSLEB128(int Value) const {
 // Emission and print routines
 //
 
-/// PrintHex - Print a value as a hexidecimal value.
+/// PrintHex - Print a value as a hexadecimal value.
 ///
 void AsmPrinter::PrintHex(int Value) const { 
   char Buffer[20];
@@ -727,7 +729,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
 /// Special characters are emitted properly.
 /// \literal (Eg. '\t') \endliteral
 void AsmPrinter::EmitString(const std::string &String) const {
-  EmitString(String.c_str(), String.size());
+  EmitString(String.data(), String.size());
 }
 
 void AsmPrinter::EmitString(const char *String, unsigned Size) const {
@@ -1357,32 +1359,31 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
 /// instruction's DebugLoc.
 void AsmPrinter::processDebugLoc(const MachineInstr *MI, 
                                  bool BeforePrintingInsn) {
-  if (!MAI || !DW)
+  if (!MAI || !DW || !MAI->doesSupportDebugInformation()
+      || !DW->ShouldEmitDwarfDebug())
     return;
   DebugLoc DL = MI->getDebugLoc();
-  if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
-    if (!DL.isUnknown()) {
-      DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
-      if (BeforePrintingInsn) {
-        if (CurDLT.Scope != 0 && PrevDLT != CurDLT) {
-	  unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
-	  				    CurDLT.Scope);
-          printLabel(L);
-          O << '\n';
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
-          DW->SetDbgScopeBeginLabels(MI, L);
-#endif
-        } else {
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
-          DW->SetDbgScopeEndLabels(MI, 0);
-#endif
-        }
-      } 
+  if (DL.isUnknown())
+    return;
+  DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
+  if (CurDLT.Scope == 0)
+    return;
+
+  if (BeforePrintingInsn) {
+    if (CurDLT != PrevDLT) {
+      unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+                                        CurDLT.Scope);
+      printLabel(L);
+      DW->BeginScope(MI, L);
       PrevDLT = CurDLT;
     }
+  } else {
+    // After printing instruction
+    DW->EndScope(MI);
   }
 }
 
+
 /// printInlineAsm - This method formats and prints the specified machine
 /// instruction that is an inline asm.
 void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
@@ -1399,6 +1400,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
   // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
   const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
 
+  O << '\t';
+
   // If this asmstr is empty, just print the #APP/#NOAPP markers.
   // These are useful to see where empty asm's wound up.
   if (AsmStr[0] == 0) {
@@ -1636,13 +1639,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
   assert(BB->hasName() &&
          "Address of anonymous basic block not supported yet!");
 
-  // FIXME: This isn't guaranteed to produce a unique name even if the
-  // block and function have a name.
-  std::string Mangled =
-    Mang->getMangledName(F, Mang->makeNameProper(BB->getName()).c_str(),
-                         /*ForcePrivate=*/true);
+  // This code must use the function name itself, and not the function number,
+  // since it must be possible to generate the label name from within other
+  // functions.
+  std::string FuncName = Mang->getMangledName(F);
+
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
+    << FuncName.size() << '_' << FuncName << '_'
+    << Mang->makeNameProper(BB->getName());
 
-  return OutContext.GetOrCreateSymbol(StringRef(Mangled));
+  return OutContext.GetOrCreateSymbol(Name.str());
 }
 
 MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
@@ -1817,21 +1824,80 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
 
 /// EmitComments - Pretty-print comments for instructions
 void AsmPrinter::EmitComments(const MachineInstr &MI) const {
-  assert(VerboseAsm && !MI.getDebugLoc().isUnknown());
-  
-  DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+  if (!VerboseAsm)
+    return;
 
-  // Print source line info.
-  O.PadToColumn(MAI->getCommentColumn());
-  O << MAI->getCommentString() << " SrcLine ";
-  if (DLT.Scope) {
-    DICompileUnit CU(DLT.Scope);
-    if (!CU.isNull())
-      O << CU.getFilename() << " ";
+  bool Newline = false;
+
+  if (!MI.getDebugLoc().isUnknown()) {
+    DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+
+    // Print source line info.
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << " SrcLine ";
+    if (DLT.Scope) {
+      DICompileUnit CU(DLT.Scope);
+      if (!CU.isNull())
+        O << CU.getFilename() << " ";
+    }
+    O << DLT.Line;
+    if (DLT.Col != 0)
+      O << ":" << DLT.Col;
+    Newline = true;
+  }
+
+  // Check for spills and reloads
+  int FI;
+
+  const MachineFrameInfo *FrameInfo =
+    MI.getParent()->getParent()->getFrameInfo();
+
+  // We assume a single instruction only has a spill or reload, not
+  // both.
+  if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      if (Newline) O << '\n';
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Reload";
+      Newline = true;
+    }
+  }
+  else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      if (Newline) O << '\n';
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Folded Reload";
+      Newline = true;
+    }
+  }
+  else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      if (Newline) O << '\n';
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Spill";
+      Newline = true;
+    }
+  }
+  else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      if (Newline) O << '\n';
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Folded Spill";
+      Newline = true;
+    }
+  }
+
+  // Check for spill-induced copies
+  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+  if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
+                                      SrcSubIdx, DstSubIdx)) {
+    if (MI.getAsmPrinterFlag(ReloadReuse)) {
+      if (Newline) O << '\n';
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Reload Reuse";
+      Newline = true;
+    }
   }
-  O << DLT.Line;
-  if (DLT.Col != 0) 
-    O << ":" << DLT.Col;
 }
 
 /// PrintChildLoopComment - Print comments about child loops within
@@ -1862,8 +1928,7 @@ static void PrintChildLoopComment(formatted_raw_ostream &O,
 }
 
 /// EmitComments - Pretty-print comments for basic blocks
-void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const
-{
+void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const {
   if (VerboseAsm) {
     // Add loop depth information
     const MachineLoop *loop = LI->getLoopFor(&MBB);
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 62b51ec..3e50a15 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -29,7 +29,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
   /// Dwarf abbreviation.
-  class VISIBILITY_HIDDEN DIEAbbrevData {
+  class DIEAbbrevData {
     /// Attribute - Dwarf attribute code.
     ///
     unsigned Attribute;
@@ -52,7 +52,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
   /// information object.
-  class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode {
+  class DIEAbbrev : public FoldingSetNode {
     /// Tag - Dwarf tag code.
     ///
     unsigned Tag;
@@ -113,7 +113,7 @@ namespace llvm {
   class CompileUnit;
   class DIEValue;
 
-  class VISIBILITY_HIDDEN DIE : public FoldingSetNode {
+  class DIE : public FoldingSetNode {
   protected:
     /// Abbrev - Buffer for constructing abbreviation.
     ///
@@ -202,7 +202,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEValue - A debug information entry value.
   ///
-  class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode {
+  class DIEValue : public FoldingSetNode {
   public:
     enum {
       isInteger,
@@ -249,7 +249,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEInteger - An integer value DIE.
   ///
-  class VISIBILITY_HIDDEN DIEInteger : public DIEValue {
+  class DIEInteger : public DIEValue {
     uint64_t Integer;
   public:
     explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
@@ -294,7 +294,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEString - A string value DIE.
   ///
-  class VISIBILITY_HIDDEN DIEString : public DIEValue {
+  class DIEString : public DIEValue {
     const std::string Str;
   public:
     explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
@@ -326,7 +326,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEDwarfLabel - A Dwarf internal label expression DIE.
   //
-  class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue {
+  class DIEDwarfLabel : public DIEValue {
     const DWLabel Label;
   public:
     explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
@@ -356,7 +356,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEObjectLabel - A label to an object in code or data.
   //
-  class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue {
+  class DIEObjectLabel : public DIEValue {
     const std::string Label;
   public:
     explicit DIEObjectLabel(const std::string &L)
@@ -389,7 +389,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIESectionOffset - A section offset DIE.
   ///
-  class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue {
+  class DIESectionOffset : public DIEValue {
     const DWLabel Label;
     const DWLabel Section;
     bool IsEH : 1;
@@ -428,7 +428,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEDelta - A simple label difference DIE.
   ///
-  class VISIBILITY_HIDDEN DIEDelta : public DIEValue {
+  class DIEDelta : public DIEValue {
     const DWLabel LabelHi;
     const DWLabel LabelLo;
   public:
@@ -462,7 +462,7 @@ namespace llvm {
   /// DIEntry - A pointer to another debug information entry.  An instance of
   /// this class can also be used as a proxy for a debug information entry not
   /// yet defined (ie. types.)
-  class VISIBILITY_HIDDEN DIEEntry : public DIEValue {
+  class DIEEntry : public DIEValue {
     DIE *Entry;
   public:
     explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
@@ -497,7 +497,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEBlock - A block of values.  Primarily used for location expressions.
   //
-  class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE {
+  class DIEBlock : public DIEValue, public DIE {
     unsigned Size;                // Size in bytes excluding size header.
   public:
     DIEBlock()
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1372fc2..c62c435 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -48,7 +48,7 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 /// CompileUnit - This dwarf writer support class manages information associate
 /// with a source file.
-class VISIBILITY_HIDDEN CompileUnit {
+class CompileUnit {
   /// ID - File identifier for source.
   ///
   unsigned ID;
@@ -127,61 +127,66 @@ public:
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   unsigned FrameIndex;               // Variable frame index.
-  bool InlinedFnVar;                 // Variable for an inlined function.
+  DbgVariable *AbstractVar;          // Abstract variable for this variable.
+  DIE *TheDIE;
 public:
-  DbgVariable(DIVariable V, unsigned I, bool IFV)
-    : Var(V), FrameIndex(I), InlinedFnVar(IFV)  {}
+  DbgVariable(DIVariable V, unsigned I)
+    : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0)  {}
 
   // Accessors.
-  DIVariable getVariable() const { return Var; }
-  unsigned getFrameIndex() const { return FrameIndex; }
-  bool isInlinedFnVar() const { return InlinedFnVar; }
+  DIVariable getVariable()           const { return Var; }
+  unsigned getFrameIndex()           const { return FrameIndex; }
+  void setAbstractVariable(DbgVariable *V) { AbstractVar = V; }
+  DbgVariable *getAbstractVariable() const { return AbstractVar; }
+  void setDIE(DIE *D)                      { TheDIE = D; }
+  DIE *getDIE()                      const { return TheDIE; }
 };
 
 //===----------------------------------------------------------------------===//
 /// DbgScope - This class is used to track scope information.
 ///
-class DbgConcreteScope;
 class DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
-  DIDescriptor Desc;                  // Debug info descriptor for scope.
-                                      // FIXME use WeakVH for Desc.
-  WeakVH InlinedAt;                   // If this scope represents inlined
-                                      // function body then this is the location
-                                      // where this body is inlined.
+  DIDescriptor Desc;                  // Debug info descriptor for scope. 
+  WeakVH InlinedAtLocation;           // Location at which scope is inlined.
+  bool AbstractScope;                 // Abstract Scope
   unsigned StartLabelID;              // Label ID of the beginning of scope.
   unsigned EndLabelID;                // Label ID of the end of scope.
   const MachineInstr *LastInsn;       // Last instruction of this scope.
   const MachineInstr *FirstInsn;      // First instruction of this scope.
   SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
   SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
-  SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
 
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
-    : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0), 
+    : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+      StartLabelID(0), EndLabelID(0), 
       LastInsn(0), FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
   DbgScope *getParent()          const { return Parent; }
+  void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
-  MDNode *getInlinedAt()   const { 
-    return dyn_cast_or_null<MDNode>(InlinedAt); 
+  MDNode *getInlinedAt()         const { 
+    return dyn_cast_or_null<MDNode>(InlinedAtLocation);
   }
+  MDNode *getScopeNode()         const { return Desc.getNode(); }
   unsigned getStartLabelID()     const { return StartLabelID; }
   unsigned getEndLabelID()       const { return EndLabelID; }
   SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
   SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
-  SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
   void setStartLabelID(unsigned S) { StartLabelID = S; }
   void setEndLabelID(unsigned E)   { EndLabelID = E; }
   void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
   const MachineInstr *getLastInsn()      { return LastInsn; }
   void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+  void setAbstractScope() { AbstractScope = true; }
+  bool isAbstractScope() const { return AbstractScope; }
   const MachineInstr *getFirstInsn()      { return FirstInsn; }
+
   /// AddScope - Add a scope to the scope.
   ///
   void AddScope(DbgScope *S) { Scopes.push_back(S); }
@@ -190,10 +195,6 @@ public:
   ///
   void AddVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  /// AddConcreteInst - Add a concrete instance to the scope.
-  ///
-  void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
-
   void FixInstructionMarkers() {
     assert (getFirstInsn() && "First instruction is missing!");
     if (getLastInsn())
@@ -218,11 +219,15 @@ public:
 void DbgScope::dump() const {
   raw_ostream &err = errs();
   err.indent(IndentLevel);
-  Desc.dump();
+  MDNode *N = Desc.getNode();
+  N->dump();
   err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+  if (AbstractScope)
+    err << "Abstract Scope\n";
 
   IndentLevel += 2;
-
+  if (!Scopes.empty())
+    err << "Children ...\n";
   for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
     if (Scopes[i] != this)
       Scopes[i]->dump();
@@ -235,7 +240,7 @@ void DbgScope::dump() const {
 /// DbgConcreteScope - This class is used to track a scope that holds concrete
 /// instance information.
 ///
-class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope {
+class DbgConcreteScope : public DbgScope {
   CompileUnit *Unit;
   DIE *Die;                           // Debug info for this concrete scope.
 public:
@@ -251,8 +256,6 @@ DbgScope::~DbgScope() {
     delete Scopes[i];
   for (unsigned j = 0, M = Variables.size(); j < M; ++j)
     delete Variables[j];
-  for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k)
-    delete ConcreteInsts[k];
 }
 
 } // end llvm namespace
@@ -262,7 +265,7 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
     ValuesSet(InitValuesSetSize), Values(), StringPool(),
     SectionSourceLines(), didInitial(false), shouldEmit(false),
-    FunctionDbgScope(0), DebugTimer(0) {
+    CurrentFnDbgScope(0), DebugTimer(0) {
   if (TimePassesIsEnabled)
     DebugTimer = new Timer("Dwarf Debug Writer",
                            getDwarfTimerGroup());
@@ -271,11 +274,6 @@ DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = Values.size(); j < M; ++j)
     delete Values[j];
 
-  for (DenseMap<const MDNode *, DbgScope *>::iterator
-         I = AbstractInstanceRootMap.begin(),
-         E = AbstractInstanceRootMap.end(); I != E;++I)
-    delete I->second;
-
   delete DebugTimer;
 }
 
@@ -1097,6 +1095,10 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
 /// CreateGlobalVariableDIE - Create new DIE using GV.
 DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
                                          const DIGlobalVariable &GV) {
+  // If the global variable was optmized out then no need to create debug info entry.
+  if (!GV.getGlobal()) return NULL;
+  if (!GV.getDisplayName()) return NULL;
+
   DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
   AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
             GV.getDisplayName());
@@ -1233,9 +1235,6 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
       }
   }
 
-  if (!SP.isLocalToUnit() && !IsInlined)
-    AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
   // DW_TAG_inlined_subroutine may refer to this DIE.
   DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
   Slot = SPDie;
@@ -1283,263 +1282,341 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
   AddSourceLine(VariableDie, &VD);
 
   // Add variable type.
-  // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead.
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex 
+  // addresses instead.
   if (VD.isBlockByrefVariable())
     AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
   else
     AddType(Unit, VariableDie, VD.getType());
 
   // Add variable address.
-  if (!DV->isInlinedFnVar()) {
-    // Variables for abstract instances of inlined functions don't get a
-    // location.
-    MachineLocation Location;
-    Location.set(RI->getFrameRegister(*MF),
-                 RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-
-
-    if (VD.hasComplexAddress())
-      AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-    else if (VD.isBlockByrefVariable())
-      AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-    else
-      AddAddress(VariableDie, dwarf::DW_AT_location, Location);
-  }
+  // Variables for abstract instances of inlined functions don't get a
+  // location.
+  MachineLocation Location;
+  Location.set(RI->getFrameRegister(*MF),
+               RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+  
+  
+  if (VD.hasComplexAddress())
+    AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+  else if (VD.isBlockByrefVariable())
+    AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+  else
+    AddAddress(VariableDie, dwarf::DW_AT_location, Location);
 
   return VariableDie;
 }
 
-/// getOrCreateScope - Returns the scope associated with the given descriptor.
-///
-DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
-                                  MDNode *InlinedAt) {
-  ValueMap<MDNode *, DbgScope *>::iterator VI = DbgScopeMap.find(N);
-  if (VI != DbgScopeMap.end())
-    return VI->second;
+/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
+/// Initialize scope and update scope hierarchy.
+DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
+  MDNode *InlinedAt) {
+  assert (N && "Invalid Scope encoding!");
+  assert (MI && "Missing machine instruction!");
+  bool GetConcreteScope = (MI && InlinedAt);
 
-  DbgScope *Parent = NULL;
+  DbgScope *NScope = NULL;
+
+  if (InlinedAt)
+    NScope = DbgScopeMap.lookup(InlinedAt);
+  else
+    NScope = DbgScopeMap.lookup(N);
+  assert (NScope && "Unable to find working scope!");
+
+  if (NScope->getFirstInsn())
+    return NScope;
 
-  if (InlinedAt) {
+  DbgScope *Parent = NULL;
+  if (GetConcreteScope) {
     DILocation IL(InlinedAt);
-    assert (!IL.isNull() && "Invalid InlindAt location!");
-    ValueMap<MDNode *, DbgScope *>::iterator DSI = 
-      DbgScopeMap.find(IL.getScope().getNode());
-    assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!");
-    Parent = DSI->second;
-  } else {
-    DIDescriptor Scope(N);
-    if (Scope.isCompileUnit()) {
-      return NULL;
-    } else if (Scope.isSubprogram()) {
-      DISubprogram SP(N);
-      DIDescriptor ParentDesc = SP.getContext();
-      if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
-        Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
-    } else if (Scope.isLexicalBlock()) {
-      DILexicalBlock DB(N);
-      DIDescriptor ParentDesc = DB.getContext();
-      if (!ParentDesc.isNull())
-        Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
-    } else
-      assert (0 && "Unexpected scope info");
-  }
-
-  DbgScope *NScope = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
+    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, 
+                         IL.getOrigLocation().getNode());
+    assert (Parent && "Unable to find Parent scope!");
+    NScope->setParent(Parent);
+    Parent->AddScope(NScope);
+  } else if (DIDescriptor(N).isLexicalBlock()) {
+    DILexicalBlock DB(N);
+    if (!DB.getContext().isNull()) {
+      Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
+      NScope->setParent(Parent);
+      Parent->AddScope(NScope);
+    }
+  }
+
   NScope->setFirstInsn(MI);
 
-  if (Parent)
-    Parent->AddScope(NScope);
-  else
-    // First function is top level function.
-    if (!FunctionDbgScope)
-      FunctionDbgScope = NScope;
+  if (!Parent && !InlinedAt) {
+    StringRef SPName = DISubprogram(N).getLinkageName();
+    if (SPName == MF->getFunction()->getName())
+      CurrentFnDbgScope = NScope;
+  }
+
+  if (GetConcreteScope) {
+    ConcreteScopes[InlinedAt] = NScope;
+    getOrCreateAbstractScope(N);
+  }
 
-  DbgScopeMap.insert(std::make_pair(N, NScope));
   return NScope;
 }
 
+DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
+  assert (N && "Invalid Scope encoding!");
 
-/// getOrCreateScope - Returns the scope associated with the given descriptor.
-/// FIXME - Remove this method.
-DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) {
-  DbgScope *&Slot = DbgScopeMap[N];
-  if (Slot) return Slot;
-
+  DbgScope *AScope = AbstractScopes.lookup(N);
+  if (AScope)
+    return AScope;
+    
   DbgScope *Parent = NULL;
-  DILexicalBlock Block(N);
 
-  // Don't create a new scope if we already created one for an inlined function.
-  DenseMap<const MDNode *, DbgScope *>::iterator
-    II = AbstractInstanceRootMap.find(N);
-  if (II != AbstractInstanceRootMap.end())
-    return LexicalScopeStack.back();
-
-  if (!Block.isNull()) {
-    DIDescriptor ParentDesc = Block.getContext();
-    Parent =
-      ParentDesc.isNull() ?  NULL : getOrCreateScope(ParentDesc.getNode());
+  DIDescriptor Scope(N);
+  if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(N);
+    DIDescriptor ParentDesc = DB.getContext();
+    if (!ParentDesc.isNull())
+      Parent = getOrCreateAbstractScope(ParentDesc.getNode());
   }
 
-  Slot = new DbgScope(Parent, DIDescriptor(N));
+  AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
 
   if (Parent)
-    Parent->AddScope(Slot);
-  else
-    // First function is top level function.
-    FunctionDbgScope = Slot;
+    Parent->AddScope(AScope);
+  AScope->setAbstractScope();
+  AbstractScopes[N] = AScope;
+  if (DIDescriptor(N).isSubprogram())
+    AbstractScopesList.push_back(AScope);
+  return AScope;
+}
+
+static DISubprogram getDISubprogram(MDNode *N) {
 
-  return Slot;
+  DIDescriptor D(N);
+  if (D.isNull())
+    return DISubprogram();
+
+  if (D.isCompileUnit()) 
+    return DISubprogram();
+
+  if (D.isSubprogram())
+    return DISubprogram(N);
+
+  if (D.isLexicalBlock())
+    return getDISubprogram(DILexicalBlock(N).getContext().getNode());
+
+  llvm_unreachable("Unexpected Descriptor!");
 }
 
-/// ConstructDbgScope - Construct the components of a scope.
-///
-void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope,
-                                   unsigned ParentStartID,
-                                   unsigned ParentEndID,
-                                   DIE *ParentDie, CompileUnit *Unit) {
-  // Add variables to scope.
-  SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables();
-  for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
-    DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit);
-    if (VariableDie) ParentDie->AddChild(VariableDie);
-  }
+DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) {
+
+ DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode);
+ assert (SPDie && "Unable to find subprogram DIE!");
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+          DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+          DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ 
+ if (!DISubprogram(SPNode).isLocalToUnit())
+   AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ // If there are global variables at this scope then add their dies.
+ for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), 
+        SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
+   MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
+   if (!N) continue;
+   DIGlobalVariable GV(N);
+   if (GV.getContext().getNode() == SPNode) {
+     DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+     if (ScopedGVDie)
+       SPDie->AddChild(ScopedGVDie);
+   }
+ }
+ return SPDie;
+}
+
+DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
+  unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+  unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+  // Ignore empty scopes.
+  if (StartID == EndID && StartID != 0)
+    return NULL;
 
-  // Add concrete instances to scope.
-  SmallVector<DbgConcreteScope *, 8> &ConcreteInsts =
-    ParentScope->getConcreteInsts();
-  for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) {
-    DbgConcreteScope *ConcreteInst = ConcreteInsts[i];
-    DIE *Die = ConcreteInst->getDie();
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+  if (Scope->isAbstractScope())
+    return ScopeDIE;
 
-    unsigned StartID = ConcreteInst->getStartLabelID();
-    unsigned EndID = ConcreteInst->getEndLabelID();
+  AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+           StartID ? 
+             DWLabel("label", StartID) 
+           : DWLabel("func_begin", SubprogramCount));
+  AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+           EndID ? 
+             DWLabel("label", EndID) 
+           : DWLabel("func_end", SubprogramCount));
 
-    // Add the scope bounds.
-    if (StartID)
-      AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-               DWLabel("label", StartID));
-    else
-      AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-               DWLabel("func_begin", SubprogramCount));
 
-    if (EndID)
-      AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-               DWLabel("label", EndID));
-    else
-      AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-               DWLabel("func_end", SubprogramCount));
 
-    ParentDie->AddChild(Die);
-  }
+  return ScopeDIE;
+}
 
-  // Add nested scopes.
-  SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes();
-  for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
-    // Define the Scope debug information entry.
-    DbgScope *Scope = Scopes[j];
+DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
+  unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+  unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+  assert (StartID && "Invalid starting label for an inlined scope!");
+  assert (EndID && "Invalid end label for an inlined scope!");
+  // Ignore empty scopes.
+  if (StartID == EndID && StartID != 0)
+    return NULL;
 
-    unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
-    unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+  DIScope DS(Scope->getScopeNode());
+  if (DS.isNull())
+    return NULL;
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
 
-    // Ignore empty scopes.
-    if (StartID == EndID && StartID != 0) continue;
+  DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+  DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+  assert (OriginDIE && "Unable to find Origin DIE!");
+  AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+              dwarf::DW_FORM_ref4, OriginDIE);
 
-    // Do not ignore inlined scopes even if they don't have any variables or
-    // scopes.
-    if (Scope->getScopes().empty() && Scope->getVariables().empty() &&
-        Scope->getConcreteInsts().empty())
-      continue;
+  AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+           DWLabel("label", StartID));
+  AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+           DWLabel("label", EndID));
 
-    if (StartID == ParentStartID && EndID == ParentEndID) {
-      // Just add stuff to the parent scope.
-      ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
-    } else {
-      DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block);
+  InlinedSubprogramDIEs.insert(OriginDIE);
 
-      // Add the scope bounds.
-      if (StartID)
-        AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-                 DWLabel("label", StartID));
-      else
-        AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-                 DWLabel("func_begin", SubprogramCount));
+  // Track the start label for this inlined function.
+  ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+    I = InlineInfo.find(InlinedSP.getNode());
 
-      if (EndID)
-        AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-                 DWLabel("label", EndID));
-      else
-        AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-                 DWLabel("func_end", SubprogramCount));
+  if (I == InlineInfo.end()) {
+    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE));
+    InlinedSPNodes.push_back(InlinedSP.getNode());
+  } else
+    I->second.push_back(std::make_pair(StartID, ScopeDIE));
 
-      // Add the scope's contents.
-      ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit);
-      ParentDie->AddChild(ScopeDie);
-    }
-  }
+  StringPool.insert(InlinedSP.getName());
+  StringPool.insert(InlinedSP.getLinkageName());
+  DILocation DL(Scope->getInlinedAt());
+  AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+  AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+  return ScopeDIE;
 }
 
-/// ConstructFunctionDbgScope - Construct the scope for the subprogram.
-///
-void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
-                                           bool AbstractScope) {
-  // Exit if there is no root scope.
-  if (!RootScope) return;
-  DIDescriptor Desc = RootScope->getDesc();
-  if (Desc.isNull())
-    return;
+DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, 
+                                      DbgScope *Scope, CompileUnit *Unit) {
+  // Get the descriptor.
+  const DIVariable &VD = DV->getVariable();
+  const char *Name = VD.getName();
+  if (!Name)
+    return NULL;
 
-  // Get the subprogram debug information entry.
-  DISubprogram SPD(Desc.getNode());
-
-  // Get the subprogram die.
-  DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
-  if (!SPDie) {
-    ConstructSubprogram(SPD.getNode());
-    SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
-  }
-  assert(SPDie && "Missing subprogram descriptor");
-
-  if (!AbstractScope) {
-    // Add the function bounds.
-    AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-             DWLabel("func_begin", SubprogramCount));
-    AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-             DWLabel("func_end", SubprogramCount));
-    MachineLocation Location(RI->getFrameRegister(*MF));
-    AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
-  }
-
-  ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU);
-  // If there are global variables at this scope then add their dies.
-  for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), 
-       SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
-    MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
-    if (!N) continue;
-    DIGlobalVariable GV(N);
-    if (GV.getContext().getNode() == RootScope->getDesc().getNode()) {
-      DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
-      SPDie->AddChild(ScopedGVDie);
-    }
+  // Translate tag to proper Dwarf tag.  The result variable is dropped for
+  // now.
+  unsigned Tag;
+  switch (VD.getTag()) {
+  case dwarf::DW_TAG_return_variable:
+    return NULL;
+  case dwarf::DW_TAG_arg_variable:
+    Tag = dwarf::DW_TAG_formal_parameter;
+    break;
+  case dwarf::DW_TAG_auto_variable:    // fall thru
+  default:
+    Tag = dwarf::DW_TAG_variable;
+    break;
   }
-}
 
-/// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
-///
-void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
-  StringMap<DIE*> &Globals = ModuleCU->getGlobals();
-  StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName());
-  if (GI != Globals.end()) {
-    DIE *SPDie = GI->second;
+  // Define variable debug information entry.
+  DIE *VariableDie = new DIE(Tag);
 
-    // Add the function bounds.
-    AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-             DWLabel("func_begin", SubprogramCount));
-    AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-             DWLabel("func_end", SubprogramCount));
 
-    MachineLocation Location(RI->getFrameRegister(*MF));
-    AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+  DIE *AbsDIE = NULL;
+  if (DbgVariable *AV = DV->getAbstractVariable())
+    AbsDIE = AV->getDIE();
+  
+  if (AbsDIE) {
+    DIScope DS(Scope->getScopeNode());
+    DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+    DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+    (void) OriginSPDIE;
+    assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
+    DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
+    assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
+    AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+                dwarf::DW_FORM_ref4, AbsDIE);
   }
+  else {
+    AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    AddSourceLine(VariableDie, &VD);
+
+    // Add variable type.
+    // FIXME: isBlockByrefVariable should be reformulated in terms of complex 
+    // addresses instead.
+    if (VD.isBlockByrefVariable())
+      AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+    else
+      AddType(Unit, VariableDie, VD.getType());
+  }
+
+  // Add variable address.
+  if (!Scope->isAbstractScope()) {
+    MachineLocation Location;
+    Location.set(RI->getFrameRegister(*MF),
+                 RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+    
+    
+    if (VD.hasComplexAddress())
+      AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else if (VD.isBlockByrefVariable())
+      AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    else
+      AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+  }
+  DV->setDIE(VariableDie);
+  return VariableDie;
+
+}
+DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
+ if (!Scope)
+  return NULL;
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+   return NULL;
+
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+   ScopeDIE = ConstructInlinedScopeDIE(Scope);
+ else if (DS.isSubprogram()) {
+   if (Scope->isAbstractScope())
+     ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode());
+   else
+     ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode());
+ }
+ else {
+   ScopeDIE = ConstructLexicalScopeDIE(Scope);
+   if (!ScopeDIE) return NULL;
+ }
+
+  // Add variables to scope.
+  SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+  for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+    DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU);
+    if (VariableDIE) 
+      ScopeDIE->AddChild(VariableDIE);
+  }
+
+  // Add nested scopes.
+  SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+    // Define the Scope debug information entry.
+    DIE *NestedDIE = ConstructScopeDIE(Scopes[j]);
+    if (NestedDIE) 
+      ScopeDIE->AddChild(NestedDIE);
+  }
+  return ScopeDIE;
 }
 
 /// GetOrCreateSourceID - Look up the source id with the given directory and
@@ -1680,6 +1757,9 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+  if (!MAI->doesSupportDebugInformation())
+    return;
+
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
 
@@ -1710,7 +1790,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
       ConstructGlobalVariableDIE(*I);
   }
 
-  // Create DIEs for each of the externally visible subprograms.
+  // Create DIEs for each subprogram.
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
          E = DbgFinder.subprogram_end(); I != E; ++I)
     ConstructSubprogram(*I);
@@ -1754,6 +1834,13 @@ void DwarfDebug::EndModule() {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+  // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+    DIE *ISP = *AI;
+    AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+
   // Standard sections final addresses.
   Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
   EmitLabel("text_end", 0);
@@ -1811,55 +1898,102 @@ void DwarfDebug::EndModule() {
     DebugTimer->stopTimer();
 }
 
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+                                              DILocation &ScopeLoc) {
+
+  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+  if (AbsDbgVariable)
+    return AbsDbgVariable;
+
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+  if (!Scope)
+    return NULL;
+
+  AbsDbgVariable = new DbgVariable(Var, FrameIdx);
+  Scope->AddVariable(AbsDbgVariable);
+  AbstractVariables[Var.getNode()] = AbsDbgVariable;
+  return AbsDbgVariable;
+}
+
 /// CollectVariableInfo - Populate DbgScope entries with variables' info.
 void DwarfDebug::CollectVariableInfo() {
   if (!MMI) return;
+
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
   for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
          VE = VMap.end(); VI != VE; ++VI) {
     MetadataBase *MB = VI->first;
     MDNode *Var = dyn_cast_or_null<MDNode>(MB);
+    if (!Var) continue;
     DIVariable DV (Var);
-    if (DV.isNull()) continue;
-    unsigned VSlot = VI->second;
-    DbgScope *Scope = NULL;
-    ValueMap<MDNode *, DbgScope *>::iterator DSI = 
-      DbgScopeMap.find(DV.getContext().getNode());
-    if (DSI != DbgScopeMap.end()) 
-      Scope = DSI->second;
-    else 
-      // There is not any instruction assocated with this scope, so get
-      // a new scope.
-      Scope = getDbgScope(DV.getContext().getNode(), 
-                          NULL /* Not an instruction */,
-                          NULL /* Not inlined */);
-    assert (Scope && "Unable to find variable scope!");
-    Scope->AddVariable(new DbgVariable(DV, VSlot, false));
-  }
-}
-
-/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
-/// start with this machine instruction.
-void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) {
+    std::pair< unsigned, MDNode *> VP = VI->second;
+    DILocation ScopeLoc(VP.second);
+
+    DbgScope *Scope =
+      ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
+    if (!Scope)
+      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); 
+    // If variable scope is not found then skip this variable.
+    if (!Scope)
+      continue;
+
+    DbgVariable *RegVar = new DbgVariable(DV, VP.first);
+    Scope->AddVariable(RegVar);
+    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc))
+      RegVar->setAbstractVariable(AbsDbgVariable);
+  }
+}
+
+/// BeginScope - Process beginning of a scope starting at Label.
+void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) {
   InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
   if (I == DbgScopeBeginMap.end())
     return;
-  SmallVector<DbgScope *, 2> &SD = I->second;
-  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+  ScopeVector &SD = DbgScopeBeginMap[MI];
+  for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
        SDI != SDE; ++SDI) 
     (*SDI)->setStartLabelID(Label);
 }
 
-/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
-/// end with this machine instruction.
-void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) {
+/// EndScope - Process end of a scope.
+void DwarfDebug::EndScope(const MachineInstr *MI) {
   InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
   if (I == DbgScopeEndMap.end())
     return;
+
+  unsigned Label = MMI->NextLabelID();
+  Asm->printLabel(Label);
+
   SmallVector<DbgScope *, 2> &SD = I->second;
   for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
        SDI != SDE; ++SDI) 
     (*SDI)->setEndLabelID(Label);
+  return;
+}
+
+/// createDbgScope - Create DbgScope for the scope.
+void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+
+  if (!InlinedAt) {
+    DbgScope *WScope = DbgScopeMap.lookup(Scope);
+    if (WScope)
+      return;
+    WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+    DbgScopeMap.insert(std::make_pair(Scope, WScope));
+    if (DIDescriptor(Scope).isLexicalBlock()) 
+      createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+    return;
+  }
+
+  DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+  if (WScope)
+    return;
+
+  WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+  DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+  DILocation DL(InlinedAt);
+  createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
 }
 
 /// ExtractScopeInformation - Scan machine instructions in this function
@@ -1870,26 +2004,41 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
   if (!DbgScopeMap.empty())
     return false;
 
-  // Scan each instruction and create scopes.
+  // Scan each instruction and create scopes. First build working set of scopes.
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
       DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown())
-        continue;
+      if (DL.isUnknown()) continue;
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
-      if (!DLT.Scope)
-        continue;
+      if (!DLT.Scope) continue;
       // There is no need to create another DIE for compile unit. For all
       // other scopes, create one DbgScope now. This will be translated 
       // into a scope DIE at the end.
-      DIDescriptor D(DLT.Scope);
-      if (!D.isCompileUnit()) {
-        DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
-        Scope->setLastInsn(MInsn);
-      }
+      if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
+      createDbgScope(DLT.Scope, DLT.InlinedAtLoc);
+    }
+  }
+
+
+  // Build scope hierarchy using working set of scopes.
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      DebugLoc DL = MInsn->getDebugLoc();
+      if (DL.isUnknown())  continue;
+      DebugLocTuple DLT = MF->getDebugLocTuple(DL);
+      if (!DLT.Scope)  continue;
+      // There is no need to create another DIE for compile unit. For all
+      // other scopes, create one DbgScope now. This will be translated 
+      // into a scope DIE at the end.
+      if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
+      DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
+      Scope->setLastInsn(MInsn);
     }
   }
 
@@ -1897,8 +2046,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
   // last instruction as this scope's last instrunction.
   for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
 	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
-    DbgScope *S = DI->second;
-    if (!S) continue;
+    if (DI->second->isAbstractScope())
+      continue;
     assert (DI->second->getFirstInsn() && "Invalid first instruction!");
     DI->second->FixInstructionMarkers();
     assert (DI->second->getLastInsn() && "Invalid last instruction!");
@@ -1911,7 +2060,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
   for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
 	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
     DbgScope *S = DI->second;
-    if (!S) continue;
+    if (S->isAbstractScope())
+      continue;
     const MachineInstr *MI = S->getFirstInsn();
     assert (MI && "DbgScope does not have first instruction!");
 
@@ -1919,8 +2069,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
     if (IDI != DbgScopeBeginMap.end())
       IDI->second.push_back(S);
     else
-      DbgScopeBeginMap.insert(std::make_pair(MI, 
-                                             SmallVector<DbgScope *, 2>(2, S)));
+      DbgScopeBeginMap[MI].push_back(S);
 
     MI = S->getLastInsn();
     assert (MI && "DbgScope does not have last instruction!");
@@ -1928,31 +2077,12 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
     if (IDI != DbgScopeEndMap.end())
       IDI->second.push_back(S);
     else
-      DbgScopeEndMap.insert(std::make_pair(MI,
-                                             SmallVector<DbgScope *, 2>(2, S)));
+      DbgScopeEndMap[MI].push_back(S);
   }
 
   return !DbgScopeMap.empty();
 }
 
-static DISubprogram getDISubprogram(MDNode *N) {
-
-  DIDescriptor D(N);
-  if (D.isNull())
-    return DISubprogram();
-
-  if (D.isCompileUnit()) 
-    return DISubprogram();
-
-  if (D.isSubprogram())
-    return DISubprogram(N);
-
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(N).getContext().getNode());
-
-  llvm_unreachable("Unexpected Descriptor!");
-}
-
 /// BeginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::BeginFunction(MachineFunction *MF) {
@@ -1963,11 +2093,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   if (!ExtractScopeInformation(MF))
     return;
   CollectVariableInfo();
-#endif
 
   // Begin accumulating function debug information.
   MMI->BeginFunction(MF);
@@ -1977,7 +2105,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
 
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   DebugLoc FDL = MF->getDefaultDebugLoc();
   if (!FDL.isUnknown()) {
     DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
@@ -1990,15 +2117,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
     Asm->printLabel(LabelID);
     O << '\n';
   }
-#else
-  DebugLoc FDL = MF->getDefaultDebugLoc();
-  if (!FDL.isUnknown()) {
-    DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
-    unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
-    Asm->printLabel(LabelID);
-    O << '\n';
-  }
-#endif
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
@@ -2011,10 +2129,9 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   if (DbgScopeMap.empty())
     return;
-#endif
+
   // Define end label for subprogram.
   EmitLabel("func_end", SubprogramCount);
 
@@ -2029,41 +2146,24 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
                             Lines.begin(), Lines.end());
   }
 
-  // Construct the DbgScope for abstract instances.
-  for (SmallVector<DbgScope *, 32>::iterator
-         I = AbstractInstanceRootList.begin(),
-         E = AbstractInstanceRootList.end(); I != E; ++I)
-    ConstructFunctionDbgScope(*I);
+  // Construct abstract scopes.
+  for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+         AE = AbstractScopesList.end(); AI != AE; ++AI) 
+    ConstructScopeDIE(*AI);
 
-  // Construct scopes for subprogram.
-  if (FunctionDbgScope)
-    ConstructFunctionDbgScope(FunctionDbgScope);
-  else
-    // FIXME: This is wrong. We are essentially getting past a problem with
-    // debug information not being able to handle unreachable blocks that have
-    // debug information in them. In particular, those unreachable blocks that
-    // have "region end" info in them. That situation results in the "root
-    // scope" not being created. If that's the case, then emit a "default"
-    // scope, i.e., one that encompasses the whole function. This isn't
-    // desirable. And a better way of handling this (and all of the debugging
-    // information) needs to be explored.
-    ConstructDefaultDbgScope(MF);
+  ConstructScopeDIE(CurrentFnDbgScope);
 
   DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
                                                MMI->getFrameMoves()));
 
   // Clear debug info
-  if (FunctionDbgScope) {
-    delete FunctionDbgScope;
+  if (CurrentFnDbgScope) {
+    CurrentFnDbgScope = NULL;
     DbgScopeMap.clear();
     DbgScopeBeginMap.clear();
     DbgScopeEndMap.clear();
-    DbgAbstractScopeMap.clear();
-    DbgConcreteScopeMap.clear();
-    FunctionDbgScope = NULL;
-    LexicalScopeStack.clear();
-    AbstractInstanceRootList.clear();
-    AbstractInstanceRootMap.clear();
+    ConcreteScopes.clear();
+    AbstractScopesList.clear();
   }
 
   Lines.clear();
@@ -2130,201 +2230,6 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
   return SrcId;
 }
 
-/// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfDebug::RecordRegionStart(MDNode *N) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  DbgScope *Scope = getOrCreateScope(N);
-  unsigned ID = MMI->NextLabelID();
-  if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
-  LexicalScopeStack.push_back(Scope);
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return ID;
-}
-
-/// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfDebug::RecordRegionEnd(MDNode *N) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  DbgScope *Scope = getOrCreateScope(N);
-  unsigned ID = MMI->NextLabelID();
-  Scope->setEndLabelID(ID);
-  // FIXME : region.end() may not be in the last basic block.
-  // For now, do not pop last lexical scope because next basic
-  // block may start new inlined function's body.
-  unsigned LSSize = LexicalScopeStack.size();
-  if (LSSize != 0 && LSSize != 1)
-    LexicalScopeStack.pop_back();
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return ID;
-}
-
-/// RecordVariable - Indicate the declaration of a local variable.
-void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) {
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  DIDescriptor Desc(N);
-  DbgScope *Scope = NULL;
-  bool InlinedFnVar = false;
-
-  if (Desc.getTag() == dwarf::DW_TAG_variable)
-    Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode());
-  else {
-    bool InlinedVar = false;
-    MDNode *Context = DIVariable(N).getContext().getNode();
-    DISubprogram SP(Context);
-    if (!SP.isNull()) {
-      // SP is inserted into DbgAbstractScopeMap when inlined function
-      // start was recorded by RecordInlineFnStart.
-      ValueMap<MDNode *, DbgScope *>::iterator
-        I = DbgAbstractScopeMap.find(SP.getNode());
-      if (I != DbgAbstractScopeMap.end()) {
-        InlinedVar = true;
-        Scope = I->second;
-      }
-    }
-    if (!InlinedVar)
-      Scope = getOrCreateScope(Context);
-  }
-
-  assert(Scope && "Unable to find the variable's scope");
-  DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar);
-  Scope->AddVariable(DV);
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-}
-
-//// RecordInlinedFnStart - Indicate the start of inlined subroutine.
-unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
-                                          unsigned Line, unsigned Col) {
-  unsigned LabelID = MMI->NextLabelID();
-
-  if (!MAI->doesDwarfUsesInlineInfoSection())
-    return LabelID;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  MDNode *Node = SP.getNode();
-  DenseMap<const MDNode *, DbgScope *>::iterator
-    II = AbstractInstanceRootMap.find(Node);
-
-  if (II == AbstractInstanceRootMap.end()) {
-    // Create an abstract instance entry for this inlined function if it doesn't
-    // already exist.
-    DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node));
-
-    // Get the compile unit context.
-    DIE *SPDie = ModuleCU->getDieMapSlotFor(Node);
-    if (!SPDie)
-      SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true);
-
-    // Mark as being inlined. This makes this subprogram entry an abstract
-    // instance root.
-    // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only
-    // that it's defined. That probably won't change in the future. However,
-    // this could be more elegant.
-    AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
-
-    // Keep track of the abstract scope for this function.
-    DbgAbstractScopeMap[Node] = Scope;
-
-    AbstractInstanceRootMap[Node] = Scope;
-    AbstractInstanceRootList.push_back(Scope);
-  }
-
-  // Create a concrete inlined instance for this inlined function.
-  DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node));
-  DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
-  ScopeDie->setAbstractCompileUnit(ModuleCU);
-
-  DIE *Origin = ModuleCU->getDieMapSlotFor(Node);
-  AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
-              dwarf::DW_FORM_ref4, Origin);
-  AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
-  AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line);
-  AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col);
-
-  ConcreteScope->setDie(ScopeDie);
-  ConcreteScope->setStartLabelID(LabelID);
-  MMI->RecordUsedDbgLabel(LabelID);
-
-  LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
-
-  // Keep track of the concrete scope that's inlined into this function.
-  ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
-    SI = DbgConcreteScopeMap.find(Node);
-
-  if (SI == DbgConcreteScopeMap.end())
-    DbgConcreteScopeMap[Node].push_back(ConcreteScope);
-  else
-    SI->second.push_back(ConcreteScope);
-
-  // Track the start label for this inlined function.
-  ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
-    I = InlineInfo.find(Node);
-
-  if (I == InlineInfo.end())
-    InlineInfo[Node].push_back(LabelID);
-  else
-    I->second.push_back(LabelID);
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return LabelID;
-}
-
-/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
-  if (!MAI->doesDwarfUsesInlineInfoSection())
-    return 0;
-
-  if (TimePassesIsEnabled)
-    DebugTimer->startTimer();
-
-  MDNode *Node = SP.getNode();
-  ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
-    I = DbgConcreteScopeMap.find(Node);
-
-  if (I == DbgConcreteScopeMap.end()) {
-    // FIXME: Can this situation actually happen? And if so, should it?
-    if (TimePassesIsEnabled)
-      DebugTimer->stopTimer();
-
-    return 0;
-  }
-
-  SmallVector<DbgScope *, 8> &Scopes = I->second;
-  if (Scopes.empty()) {
-    // Returned ID is 0 if this is unbalanced "end of inlined
-    // scope". This could happen if optimizer eats dbg intrinsics
-    // or "beginning of inlined scope" is not recoginized due to
-    // missing location info. In such cases, ignore this region.end.
-    return 0;
-  }
-
-  DbgScope *Scope = Scopes.back(); Scopes.pop_back();
-  unsigned ID = MMI->NextLabelID();
-  MMI->RecordUsedDbgLabel(ID);
-  Scope->setEndLabelID(ID);
-
-  if (TimePassesIsEnabled)
-    DebugTimer->stopTimer();
-
-  return ID;
-}
-
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
@@ -2470,10 +2375,7 @@ void DwarfDebug::EmitDIE(DIE *Die) {
     case dwarf::DW_AT_abstract_origin: {
       DIEEntry *E = cast<DIEEntry>(Values[i]);
       DIE *Origin = E->getEntry();
-      unsigned Addr =
-        CompileUnitOffsets[Die->getAbstractCompileUnit()] +
-        Origin->getOffset();
-
+      unsigned Addr = Origin->getOffset();
       Asm->EmitInt32(Addr);
       break;
     }
@@ -3002,10 +2904,14 @@ void DwarfDebug::EmitDebugInlineInfo() {
   Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
   Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
 
-  for (ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
-         I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
-    MDNode *Node = I->first;
-    SmallVector<unsigned, 4> &Labels = I->second;
+  for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+         E = InlinedSPNodes.end(); I != E; ++I) {
+    
+//  for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+    //        I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
+    MDNode *Node = *I;
+    ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node);
+    SmallVector<InlineInfoLabels, 4> &Labels = II->second;
     DISubprogram SP(Node);
     const char *LName = SP.getLinkageName();
     const char *Name = SP.getName();
@@ -3019,17 +2925,21 @@ void DwarfDebug::EmitDebugInlineInfo() {
       // __asm__ attribute.
       if (LName[0] == 1)
         LName = &LName[1];
-      Asm->EmitString(LName);
+//      Asm->EmitString(LName);
+      EmitSectionOffset("string", "section_str",
+                        StringPool.idFor(LName), false, true);
+
     }
     Asm->EOL("MIPS linkage name");
-
-    Asm->EmitString(Name); Asm->EOL("Function name");
-
+//    Asm->EmitString(Name); 
+    EmitSectionOffset("string", "section_str",
+                      StringPool.idFor(Name), false, true);
+    Asm->EOL("Function name");
     Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count");
 
-    for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
+    for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
-      DIE *SP = ModuleCU->getDieMapSlotFor(Node);
+      DIE *SP = LI->second;
       Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
 
       if (TD->getPointerSize() == sizeof(int32_t))
@@ -3037,7 +2947,7 @@ void DwarfDebug::EmitDebugInlineInfo() {
       else
         O << MAI->getData64bitsDirective();
 
-      PrintLabelName("label", *LI); Asm->EOL("low_pc");
+      PrintLabelName("label", LI->first); Asm->EOL("low_pc");
     }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ddb0a15..646de8f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,9 +30,9 @@
 namespace llvm {
 
 class CompileUnit;
-class DbgVariable;
-class DbgScope;
 class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
 class MachineFrameInfo;
 class MachineModuleInfo;
 class MCAsmInfo;
@@ -41,7 +41,7 @@ class Timer;
 //===----------------------------------------------------------------------===//
 /// SrcLineInfo - This class is used to record source line correspondence.
 ///
-class VISIBILITY_HIDDEN SrcLineInfo {
+class SrcLineInfo {
   unsigned Line;                     // Source line number.
   unsigned Column;                   // Source column.
   unsigned SourceID;                 // Source ID number.
@@ -57,7 +57,7 @@ public:
   unsigned getLabelID() const { return LabelID; }
 };
 
-class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
+class DwarfDebug : public Dwarf {
   //===--------------------------------------------------------------------===//
   // Attributes used to construct specific Dwarf sections.
   //
@@ -134,52 +134,52 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   ///
   bool shouldEmit;
 
-  // FunctionDbgScope - Top level scope for the current function.
+  // CurrentFnDbgScope - Top level scope for the current function.
   //
-  DbgScope *FunctionDbgScope;
+  DbgScope *CurrentFnDbgScope;
   
   /// DbgScopeMap - Tracks the scopes in the current function.
+  ///
   ValueMap<MDNode *, DbgScope *> DbgScopeMap;
 
+  /// ConcreteScopes - Tracks the concrete scopees in the current function.
+  /// These scopes are also included in DbgScopeMap.
+  ValueMap<MDNode *, DbgScope *> ConcreteScopes;
+
+  /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+  /// not included DbgScopeMap.
+  ValueMap<MDNode *, DbgScope *> AbstractScopes;
+  SmallVector<DbgScope *, 4>AbstractScopesList;
+
+  /// AbstractVariables - Collection on abstract variables.
+  ValueMap<MDNode *, DbgVariable *> AbstractVariables;
+
+  /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+  /// (at the end of the module) as DW_AT_inline.
+  SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+  /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
+  SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
+
   /// ScopedGVs - Tracks global variables that are not at file scope.
   /// For example void f() { static int b = 42; }
   SmallVector<WeakVH, 4> ScopedGVs;
 
-  typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> > 
+  typedef SmallVector<DbgScope *, 2> ScopeVector;
+  typedef DenseMap<const MachineInstr *, ScopeVector>
     InsnToDbgScopeMapTy;
 
-  /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts.
+  /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
   InsnToDbgScopeMapTy DbgScopeBeginMap;
 
   /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
   InsnToDbgScopeMapTy DbgScopeEndMap;
 
-  /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
-  /// function.
-  ValueMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
-
-  /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
-  /// function.
-  ValueMap<MDNode *,
-           SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
-
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
-  ValueMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
-
-  /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
-  /// functions. These are subroutine entries that contain a DW_AT_inline
-  /// attribute.
-  DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap;
-
-  /// AbstractInstanceRootList - List of abstract instance roots of inlined
-  /// functions. These are subroutine entries that contain a DW_AT_inline
-  /// attribute.
-  SmallVector<DbgScope *, 32> AbstractInstanceRootList;
-
-  /// LexicalScopeStack - A stack of lexical scopes. The top one is the current
-  /// scope.
-  SmallVector<DbgScope *, 16> LexicalScopeStack;
+  typedef std::pair<unsigned, DIE *> InlineInfoLabels;
+  ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+  SmallVector<MDNode *, 4> InlinedSPNodes;
 
   /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
   /// used when calculating the "origin" of a concrete instance of an inlined
@@ -361,10 +361,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   ///
   DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
 
-  /// getDbgScope - Returns the scope associated with the given descriptor.
-  ///
-  DbgScope *getOrCreateScope(MDNode *N);
-  DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+  /// getUpdatedDbgScope - Find or create DbgScope assicated with 
+  /// the instruction. Initialize scope and update scope hierarchy.
+  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+
+  /// createDbgScope - Create DbgScope for the scope.
+  void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
+
+  DbgScope *getOrCreateAbstractScope(MDNode *N);
+
+  /// findAbstractVariable - Find abstract variable associated with Var.
+  DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
+                                    DILocation &Loc);
+
+  DIE *UpdateSubprogramScopeDIE(MDNode *SPNode);
+  DIE *ConstructLexicalScopeDIE(DbgScope *Scope);
+  DIE *ConstructScopeDIE(DbgScope *Scope);
+  DIE *ConstructInlinedScopeDIE(DbgScope *Scope);
+  DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
 
   /// ConstructDbgScope - Construct the components of a scope.
   ///
@@ -372,15 +386,6 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
                          unsigned ParentStartID, unsigned ParentEndID,
                          DIE *ParentDie, CompileUnit *Unit);
 
-  /// ConstructFunctionDbgScope - Construct the scope for the subprogram.
-  ///
-  void ConstructFunctionDbgScope(DbgScope *RootScope,
-                                 bool AbstractScope = false);
-
-  /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
-  ///
-  void ConstructDefaultDbgScope(MachineFunction *MF);
-
   /// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
   /// tools to recognize the object file contains Dwarf information.
   void EmitInitial();
@@ -535,22 +540,6 @@ public:
   unsigned getOrCreateSourceID(const std::string &DirName,
                                const std::string &FileName);
 
-  /// RecordRegionStart - Indicate the start of a region.
-  unsigned RecordRegionStart(MDNode *N);
-
-  /// RecordRegionEnd - Indicate the end of a region.
-  unsigned RecordRegionEnd(MDNode *N);
-
-  /// RecordVariable - Indicate the declaration of  a local variable.
-  void RecordVariable(MDNode *N, unsigned FrameIndex);
-
-  //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
-  unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
-                                unsigned Line, unsigned Col);
-
-  /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-  unsigned RecordInlinedFnEnd(DISubprogram &SP);
-
   /// ExtractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
   bool ExtractScopeInformation(MachineFunction *MF);
@@ -558,15 +547,16 @@ public:
   /// CollectVariableInfo - Populate DbgScope entries with variables' info.
   void CollectVariableInfo();
 
-  /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
-  /// start with this machine instruction.
-  void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label);
-
   /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
   /// end with this machine instruction.
   void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
-};
 
+  /// BeginScope - Process beginning of a scope starting at Label.
+  void BeginScope(const MachineInstr *MI, unsigned Label);
+
+  /// EndScope - Prcess end of a scope.
+  void EndScope(const MachineInstr *MI);
+};
 } // End of namespace llvm
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 6c03b55..1c8b8f4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -74,6 +74,25 @@ unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
   return 0;
 }
 
+/// CreateLabelDiff - Emit a label and subtract it from the expression we
+/// already have.  This is equivalent to emitting "foo - .", but we have to emit
+/// the label for "." directly.
+const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef,
+                                              const char *LabelName,
+                                              unsigned Index) {
+  SmallString<64> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+                            << LabelName << Asm->getFunctionNumber()
+                            << "_" << Index;
+  MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+  Asm->OutStreamer.EmitLabel(DotSym);
+
+  return MCBinaryExpr::CreateSub(ExprRef,
+                                 MCSymbolRefExpr::Create(DotSym,
+                                                         Asm->OutContext),
+                                 Asm->OutContext);
+}
+
 /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
 /// is shared among many Frame Description Entries.  There is at least one CIE
 /// in every non-empty .debug_frame section.
@@ -176,24 +195,10 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
 
   // If there is a personality, we need to indicate the function's location.
   if (PersonalityRef) {
-    // If the reference to the personality function symbol is not already
-    // pc-relative, then we need to subtract our current address from it.  Do
-    // this by emitting a label and subtracting it from the expression we
-    // already have.  This is equivalent to emitting "foo - .", but we have to
-    // emit the label for "." directly.
-    if (!IsPersonalityPCRel) {
-      SmallString<64> Name;
-      raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
-         << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index;
-      MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
-      Asm->OutStreamer.EmitLabel(DotSym);
-      
-      PersonalityRef =  
-        MCBinaryExpr::CreateSub(PersonalityRef,
-                                MCSymbolRefExpr::Create(DotSym,Asm->OutContext),
-                                Asm->OutContext);
-    }
-    
+    if (!IsPersonalityPCRel)
+      PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
+                                       Index);
+
     O << MAI->getData32bitsDirective();
     PersonalityRef->print(O, MAI);
     Asm->EOL("Personality");
@@ -232,11 +237,16 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
   // corresponding function is static, this should not be externally visible.
   if (!TheFunc->hasLocalLinkage())
     if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
-      O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
+      O << GlobalEHDirective << EHFrameInfo.FnName << '\n';
 
   // If corresponding function is weak definition, this should be too.
   if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
-    O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+    O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << '\n';
+
+  // If corresponding function is hidden, this should be too.
+  if (TheFunc->hasHiddenVisibility())
+    if (const char *HiddenDirective = MAI->getHiddenDirective())
+      O << HiddenDirective << EHFrameInfo.FnName << '\n' ;
 
   // If there are no calls then you can't unwind.  This may mean we can omit the
   // EH Frame, but some environments do not handle weak absolute symbols. If
@@ -457,6 +467,39 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
   return SizeActions;
 }
 
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+  assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+  bool MarkedNoUnwind = false;
+  bool SawFunc = false;
+
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+
+    if (MO.isGlobal()) {
+      if (Function *F = dyn_cast<Function>(MO.getGlobal())) {
+        if (SawFunc) {
+          // Be conservative. If we have more than one function operand for this
+          // call, then we can't make the assumption that it's the callee and
+          // not a parameter to the call.
+          // 
+          // FIXME: Determine if there's a way to say that `F' is the callee or
+          // parameter.
+          MarkedNoUnwind = false;
+          break;
+        }
+
+        MarkedNoUnwind = F->doesNotThrow();
+        SawFunc = true;
+      }
+    }
+  }
+
+  return MarkedNoUnwind;
+}
+
 /// ComputeCallSiteTable - Compute the call-site table.  The entry for an invoke
 /// has a try-range containing the call, a non-zero landing pad, and an
 /// appropriate action.  The entry for an ordinary call has a try-range
@@ -485,7 +528,9 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
          MI != E; ++MI) {
       if (!MI->isLabel()) {
-        SawPotentiallyThrowing |= MI->getDesc().isCall();
+        if (MI->getDesc().isCall())
+          SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+
         continue;
       }
 
@@ -497,7 +542,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         SawPotentiallyThrowing = false;
 
       // Beginning of a new try-range?
-      RangeMapType::iterator L = PadMap.find(BeginLabel);
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
       if (L == PadMap.end())
         // Nope, it was just some random label.
         continue;
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f6f5025..aff1665 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -25,13 +25,14 @@ namespace llvm {
 struct LandingPadInfo;
 class MachineModuleInfo;
 class MCAsmInfo;
+class MCExpr;
 class Timer;
 class raw_ostream;
 
 //===----------------------------------------------------------------------===//
 /// DwarfException - Emits Dwarf exception handling directives.
 ///
-class VISIBILITY_HIDDEN DwarfException : public Dwarf {
+class DwarfException : public Dwarf {
   struct FunctionEHFrameInfo {
     std::string FnName;
     unsigned Number;
@@ -155,6 +156,10 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
                                SmallVectorImpl<ActionEntry> &Actions,
                                SmallVectorImpl<unsigned> &FirstActions);
 
+  /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+  /// marked `nounwind'. Return `false' otherwise.
+  bool CallToNoUnwindFunction(const MachineInstr *MI);
+
   /// ComputeCallSiteTable - Compute the call-site table.  The entry for an
   /// invoke has a try-range containing the call, a non-zero landing pad and an
   /// appropriate action.  The entry for an ordinary call has a try-range
@@ -168,6 +173,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
                             const SmallVectorImpl<unsigned> &FirstActions);
   void EmitExceptionTable();
 
+  /// CreateLabelDiff - Emit a label and subtract it from the expression we
+  /// already have.  This is equivalent to emitting "foo - .", but we have to
+  /// emit the label for "." directly.
+  const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName,
+                                unsigned Index);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 33ebb3b..dedd695 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -29,7 +29,7 @@ namespace llvm {
   class TargetData;
   class TargetRegisterInfo;
 
-  class VISIBILITY_HIDDEN Dwarf {
+  class Dwarf {
   protected:
     //===-------------------------------------------------------------==---===//
     // Core attributes used by the DWARF printer.
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 0638d35..63ae653 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -81,47 +81,20 @@ unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
   return DD->RecordSourceLine(Line, Col, Scope);
 }
 
-/// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfWriter::RecordRegionStart(MDNode *N) {
-  return DD->RecordRegionStart(N);
-}
-
-/// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfWriter::RecordRegionEnd(MDNode *N) {
-  return DD->RecordRegionEnd(N);
-}
-
 /// getRecordSourceLineCount - Count source lines.
 unsigned DwarfWriter::getRecordSourceLineCount() {
   return DD->getRecordSourceLineCount();
 }
 
-/// RecordVariable - Indicate the declaration of  a local variable.
-///
-void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) {
-  DD->RecordVariable(N, FrameIndex);
-}
-
 /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
 /// be emitted.
 bool DwarfWriter::ShouldEmitDwarfDebug() const {
   return DD && DD->ShouldEmitDwarfDebug();
 }
 
-//// RecordInlinedFnStart
-unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
-                                           unsigned Line, unsigned Col) {
-  return DD->RecordInlinedFnStart(SP, CU, Line, Col);
-}
-
-/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
-  return DD->RecordInlinedFnEnd(SP);
-}
-
-void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) {
-  DD->SetDbgScopeEndLabels(MI, L);
+void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
+  DD->BeginScope(MI, L);
 }
-void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) {
-  DD->SetDbgScopeBeginLabels(MI, L);
+void DwarfWriter::EndScope(const MachineInstr *MI) {
+  DD->EndScope(MI);
 }
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index baea964..94bfb72 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -40,18 +41,38 @@ using namespace llvm;
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
-static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", 
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
 // Throttle for huge numbers of predecessors (compile speed problems)
 static cl::opt<unsigned>
-TailMergeThreshold("tail-merge-threshold", 
+TailMergeThreshold("tail-merge-threshold",
           cl::desc("Max number of predecessors to consider tail merging"),
           cl::init(150), cl::Hidden);
 
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+          cl::desc("Min number of instructions to consider tail merging"),
+                              cl::init(3), cl::Hidden);
+
+namespace {
+  /// BranchFolderPass - Wrap branch folder in a machine function pass.
+  class BranchFolderPass : public MachineFunctionPass,
+                           public BranchFolder {
+  public:
+    static char ID;
+    explicit BranchFolderPass(bool defaultEnableTailMerge)
+      : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+  };
+}
 
 char BranchFolderPass::ID = 0;
 
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { 
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
   return new BranchFolderPass(DefaultEnableTailMerge);
 }
 
@@ -63,7 +84,6 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
 }
 
 
-
 BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
   switch (FlagEnableTailMerge) {
   case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
@@ -77,12 +97,12 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
 void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
   DEBUG(errs() << "\nRemoving MBB: " << *MBB);
-  
+
   MachineFunction *MF = MBB->getParent();
   // drop all successors.
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
-  
+
   // If there are any labels in the basic block, unregister them from
   // MachineModuleInfo.
   if (MMI && !MBB->empty()) {
@@ -93,7 +113,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
         MMI->InvalidateLabel(I->getOperand(0).getImm());
     }
   }
-  
+
   // Remove the block.
   MF->erase(MBB);
 }
@@ -182,6 +202,11 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= MadeChangeThisIteration;
   }
 
+  // Do tail duplication once after tail merging is done.  Otherwise it is
+  // tough to avoid situations where tail duplication and tail merging undo
+  // each other's transformations ad infinitum.
+  MadeChange |= TailDuplicateBlocks(MF);
+
   // See if any jump tables have become mergable or dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
@@ -190,7 +215,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     // Figure out how these jump tables should be merged.
     std::vector<unsigned> JTMapping;
     JTMapping.reserve(JTs.size());
-    
+
     // We always keep the 0th jump table.
     JTMapping.push_back(0);
 
@@ -202,7 +227,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
       else
         JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
     }
-    
+
     // If a jump table was merge with another one, walk the function rewriting
     // references to jump tables to reference the new JT ID's.  Keep track of
     // whether we see a jump table idx, if not, we can delete the JT.
@@ -221,7 +246,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
           JTIsLive.set(NewIdx);
         }
     }
-   
+
     // Finally, remove dead jump tables.  This happens either because the
     // indirect jump was unreachable (and thus deleted) or because the jump
     // table was merged with some other one.
@@ -245,7 +270,7 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
   unsigned Hash = MI->getOpcode();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &Op = MI->getOperand(i);
-    
+
     // Merge in bits from the operand if easy.
     unsigned OperandHash = 0;
     switch (Op.getType()) {
@@ -267,31 +292,30 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
       break;
     default: break;
     }
-    
+
     Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
   }
   return Hash;
 }
 
 /// HashEndOfMBB - Hash the last few instructions in the MBB.  For blocks
-/// with no successors, we hash two instructions, because cross-jumping 
-/// only saves code when at least two instructions are removed (since a 
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
 /// branch must be inserted).  For blocks with a successor, one of the
 /// two blocks to be tail-merged will end with a branch already, so
 /// it gains to cross-jump even for one instruction.
-
 static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
                              unsigned minCommonTailLength) {
   MachineBasicBlock::const_iterator I = MBB->end();
   if (I == MBB->begin())
     return 0;   // Empty MBB.
-  
+
   --I;
   unsigned Hash = HashMachineInstr(I);
-    
+
   if (I == MBB->begin() || minCommonTailLength == 1)
     return Hash;   // Single instr MBB.
-  
+
   --I;
   // Hash in the second-to-last instruction.
   Hash ^= HashMachineInstr(I) << 2;
@@ -307,11 +331,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
                                         MachineBasicBlock::iterator &I2) {
   I1 = MBB1->end();
   I2 = MBB2->end();
-  
+
   unsigned TailLen = 0;
   while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
     --I1; --I2;
-    if (!I1->isIdenticalTo(I2) || 
+    if (!I1->isIdenticalTo(I2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
@@ -332,11 +356,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                            MachineBasicBlock *NewDest) {
   MachineBasicBlock *OldBB = OldInst->getParent();
-  
+
   // Remove all the old successors of OldBB from the CFG.
   while (!OldBB->succ_empty())
     OldBB->removeSuccessor(OldBB->succ_begin());
-  
+
   // Remove all the dead instructions from the end of OldBB.
   OldBB->erase(OldInst, OldBB->end());
 
@@ -361,10 +385,10 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
 
   // Move all the successors of this block to the specified block.
   NewMBB->transferSuccessors(&CurMBB);
- 
+
   // Add an edge from CurMBB to NewMBB for the fall-through.
   CurMBB.addSuccessor(NewMBB);
-  
+
   // Splice the code over.
   NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
 
@@ -375,7 +399,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
       RS->forward(prior(CurMBB.end()));
     BitVector RegsLiveAtExit(TRI->getNumRegs());
     RS->getRegsUsed(RegsLiveAtExit, false);
-    for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++)
+    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
       if (RegsLiveAtExit[i])
         NewMBB->addLiveIn(i);
   }
@@ -404,8 +428,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
 // branches temporarily for tail merging).  In the case where CurMBB ends
 // with a conditional branch to the next block, optimize by reversing the
 // test and conditionally branching to SuccMBB instead.
-
-static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
                     const TargetInstrInfo *TII) {
   MachineFunction *MF = CurMBB->getParent();
   MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
@@ -425,24 +448,43 @@ static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
   TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
 }
 
-static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
-                         const std::pair<unsigned,MachineBasicBlock*> &q) {
-    if (p.first < q.first)
-      return true;
-     else if (p.first > q.first)
-      return false;
-    else if (p.second->getNumber() < q.second->getNumber())
-      return true;
-    else if (p.second->getNumber() > q.second->getNumber())
-      return false;
-    else {
-      // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
-      // an object with itself.
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+  if (getHash() < o.getHash())
+    return true;
+   else if (getHash() > o.getHash())
+    return false;
+  else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+    return true;
+  else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+    return false;
+  else {
+    // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+    // an object with itself.
 #ifndef _GLIBCXX_DEBUG
-      llvm_unreachable("Predecessor appears twice");
+    llvm_unreachable("Predecessor appears twice");
 #endif
-      return false;
+    return false;
+  }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+                                 MachineBasicBlock::iterator &I) {
+  I = MBB->end();
+  unsigned NumTerms = 0;
+  for (;;) {
+    if (I == MBB->begin()) {
+      I = MBB->end();
+      break;
     }
+    --I;
+    if (!I->getDesc().isTerminator()) break;
+    ++NumTerms;
+  }
+  return NumTerms;
 }
 
 /// ProfitableToMerge - Check if two machine basic blocks have a common tail
@@ -454,21 +496,52 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
                               unsigned minCommonTailLength,
                               unsigned &CommonTailLen,
                               MachineBasicBlock::iterator &I1,
-                              MachineBasicBlock::iterator &I2) {
+                              MachineBasicBlock::iterator &I2,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB) {
   CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
   MachineFunction *MF = MBB1->getParent();
 
-  if (CommonTailLen >= minCommonTailLength)
-    return true;
-
   if (CommonTailLen == 0)
     return false;
 
-  // If we are optimizing for code size, 1 instruction in common is enough if
-  // we don't have to split a block.  At worst we will be replacing a
-  // fallthrough into the common tail with a branch, which at worst breaks
-  // even with falling through into the duplicated common tail.
-  if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+  // It's almost always profitable to merge any number of non-terminator
+  // instructions with the block that falls through into the common successor.
+  if (MBB1 == PredBB || MBB2 == PredBB) {
+    MachineBasicBlock::iterator I;
+    unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+    if (CommonTailLen > NumTerms)
+      return true;
+  }
+
+  // If one of the blocks can be completely merged and happens to be in
+  // a position where the other could fall through into it, merge any number
+  // of instructions, because it can be done without a branch.
+  // TODO: If the blocks are not adjacent, move one of them so that they are?
+  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+    return true;
+  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+    return true;
+
+  // If both blocks have an unconditional branch temporarily stripped out,
+  // count that as an additional common instruction for the following
+  // heuristics.
+  unsigned EffectiveTailLen = CommonTailLen;
+  if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+      !MBB1->back().getDesc().isBarrier() &&
+      !MBB2->back().getDesc().isBarrier())
+    ++EffectiveTailLen;
+
+  // Check if the common tail is long enough to be worthwhile.
+  if (EffectiveTailLen >= minCommonTailLength)
+    return true;
+
+  // If we are optimizing for code size, 2 instructions in common is enough if
+  // we don't have to split a block.  At worst we will be introducing 1 new
+  // branch instruction, which is likely to be smaller than the 2
+  // instructions that would be deleted in the merge.
+  if (EffectiveTailLen >= 2 &&
+      MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
@@ -476,40 +549,44 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
 }
 
 /// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element).   Build the vector 
+/// hash CurHash (guaranteed to match the last element).  Build the vector
 /// SameTails of all those that have the (same) largest number of instructions
 /// in common of any pair of these blocks.  SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be 
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the 
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
 /// instruction where the matching code sequence begins.
 /// Order of elements in SameTails is the reverse of the order in which
 /// those blocks appear in MergePotentials (where they are not necessarily
 /// consecutive).
-unsigned BranchFolder::ComputeSameTails(unsigned CurHash, 
-                                        unsigned minCommonTailLength) {
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+                                        unsigned minCommonTailLength,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
   unsigned maxCommonTailLength = 0U;
   SameTails.clear();
   MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
   MPIterator HighestMPIter = prior(MergePotentials.end());
   for (MPIterator CurMPIter = prior(MergePotentials.end()),
-                  B = MergePotentials.begin(); 
-       CurMPIter!=B && CurMPIter->first==CurHash;
+                  B = MergePotentials.begin();
+       CurMPIter != B && CurMPIter->getHash() == CurHash;
        --CurMPIter) {
-    for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
+    for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
       unsigned CommonTailLen;
-      if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength,
-                            CommonTailLen, TrialBBI1, TrialBBI2)) {
+      if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+                            minCommonTailLength,
+                            CommonTailLen, TrialBBI1, TrialBBI2,
+                            SuccBB, PredBB)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
           HighestMPIter = CurMPIter;
-          SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1));
+          SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
         }
         if (HighestMPIter == CurMPIter &&
             CommonTailLen == maxCommonTailLength)
-          SameTails.push_back(std::make_pair(I, TrialBBI2));
+          SameTails.push_back(SameTailElt(I, TrialBBI2));
       }
-      if (I==B)
+      if (I == B)
         break;
     }
   }
@@ -518,21 +595,21 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
 
 /// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
 /// MergePotentials, restoring branches at ends of blocks as appropriate.
-void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, 
-                                        MachineBasicBlock* SuccBB,
-                                        MachineBasicBlock* PredBB) {
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
   MPIterator CurMPIter, B;
-  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); 
-       CurMPIter->first==CurHash;
+  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+       CurMPIter->getHash() == CurHash;
        --CurMPIter) {
     // Put the unconditional branch back, if we need one.
-    MachineBasicBlock *CurMBB = CurMPIter->second;
+    MachineBasicBlock *CurMBB = CurMPIter->getBlock();
     if (SuccBB && CurMBB != PredBB)
       FixTail(CurMBB, SuccBB, TII);
-    if (CurMPIter==B)
+    if (CurMPIter == B)
       break;
   }
-  if (CurMPIter->first!=CurHash)
+  if (CurMPIter->getHash() != CurHash)
     CurMPIter++;
   MergePotentials.erase(CurMPIter, MergePotentials.end());
 }
@@ -541,35 +618,37 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
 /// only of the common tail.  Create a block that does by splitting one.
 unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
                                              unsigned maxCommonTailLength) {
-  unsigned i, commonTailIndex;
+  unsigned commonTailIndex = 0;
   unsigned TimeEstimate = ~0U;
-  for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
+  for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
     // Use PredBB if possible; that doesn't require a new branch.
-    if (SameTails[i].first->second==PredBB) {
+    if (SameTails[i].getBlock() == PredBB) {
       commonTailIndex = i;
       break;
     }
     // Otherwise, make a (fairly bogus) choice based on estimate of
     // how long it will take the various blocks to execute.
-    unsigned t = EstimateRuntime(SameTails[i].first->second->begin(), 
-                                 SameTails[i].second);
-    if (t<=TimeEstimate) {
+    unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+                                 SameTails[i].getTailStartPos());
+    if (t <= TimeEstimate) {
       TimeEstimate = t;
       commonTailIndex = i;
     }
   }
 
-  MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
-  MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+  MachineBasicBlock::iterator BBI =
+    SameTails[commonTailIndex].getTailStartPos();
+  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
 
-  DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size "
+  DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
                << maxCommonTailLength);
 
   MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
-  SameTails[commonTailIndex].first->second = newMBB;
-  SameTails[commonTailIndex].second = newMBB->begin();
+  SameTails[commonTailIndex].setBlock(newMBB);
+  SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
   // If we split PredBB, newMBB is the new predecessor.
-  if (PredBB==MBB)
+  if (PredBB == MBB)
     PredBB = newMBB;
 
   return commonTailIndex;
@@ -579,35 +658,49 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
 // successor, or all have no successor) can be tail-merged.  If there is a
 // successor, any blocks in MergePotentials that are not tail-merged and
 // are not immediately before Succ must have an unconditional branch to
-// Succ added (but the predecessor/successor lists need no adjustment).  
+// Succ added (but the predecessor/successor lists need no adjustment).
 // The lone predecessor of Succ that falls through into Succ,
 // if any, is given in PredBB.
 
-bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
-                                  MachineBasicBlock* PredBB) {
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+                                      MachineBasicBlock *PredBB) {
   bool MadeChange = false;
 
-  // It doesn't make sense to save a single instruction since tail merging
-  // will add a jump.
-  // FIXME: Ask the target to provide the threshold?
-  unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
-  
-  DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n');
+  // Except for the special cases below, tail-merge if there are at least
+  // this many instructions in common.
+  unsigned minCommonTailLength = TailMergeSize;
+
+  DEBUG(errs() << "\nTryTailMergeBlocks: ";
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          errs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+                 << (i == e-1 ? "" : ", ");
+        errs() << "\n";
+        if (SuccBB) {
+          errs() << "  with successor BB#" << SuccBB->getNumber() << '\n';
+          if (PredBB)
+            errs() << "  which has fall-through from BB#"
+                   << PredBB->getNumber() << "\n";
+        }
+        errs() << "Looking for common tails of at least "
+               << minCommonTailLength << " instruction"
+               << (minCommonTailLength == 1 ? "" : "s") << '\n';
+       );
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
-  std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare);
+  std::stable_sort(MergePotentials.begin(), MergePotentials.end());
 
   // Walk through equivalence sets looking for actual exact matches.
   while (MergePotentials.size() > 1) {
-    unsigned CurHash  = prior(MergePotentials.end())->first;
-    
+    unsigned CurHash = MergePotentials.back().getHash();
+
     // Build SameTails, identifying the set of blocks with this hash code
     // and with the maximum number of instructions in common.
-    unsigned maxCommonTailLength = ComputeSameTails(CurHash, 
-                                                    minCommonTailLength);
+    unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+                                                    minCommonTailLength,
+                                                    SuccBB, PredBB);
 
-    // If we didn't find any pair that has at least minCommonTailLength 
+    // If we didn't find any pair that has at least minCommonTailLength
     // instructions in common, remove all blocks with this hash code and retry.
     if (SameTails.empty()) {
       RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -618,36 +711,58 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
     // block, which we can't jump to), we can treat all blocks with this same
     // tail at once.  Use PredBB if that is one of the possibilities, as that
     // will not introduce any extra branches.
-    MachineBasicBlock *EntryBB = MergePotentials.begin()->second->
-                                getParent()->begin();
-    unsigned int commonTailIndex, i;
-    for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
-      MachineBasicBlock *MBB = SameTails[i].first->second;
-      if (MBB->begin() == SameTails[i].second && MBB != EntryBB) {
-        commonTailIndex = i;
-        if (MBB==PredBB)
+    MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+                                 getParent()->begin();
+    unsigned commonTailIndex = SameTails.size();
+    // If there are two blocks, check to see if one can be made to fall through
+    // into the other.
+    if (SameTails.size() == 2 &&
+        SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+        SameTails[1].tailIsWholeBlock())
+      commonTailIndex = 1;
+    else if (SameTails.size() == 2 &&
+             SameTails[1].getBlock()->isLayoutSuccessor(
+                                                     SameTails[0].getBlock()) &&
+             SameTails[0].tailIsWholeBlock())
+      commonTailIndex = 0;
+    else {
+      // Otherwise just pick one, favoring the fall-through predecessor if
+      // there is one.
+      for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+        MachineBasicBlock *MBB = SameTails[i].getBlock();
+        if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+          continue;
+        if (MBB == PredBB) {
+          commonTailIndex = i;
           break;
+        }
+        if (SameTails[i].tailIsWholeBlock())
+          commonTailIndex = i;
       }
     }
 
-    if (commonTailIndex==SameTails.size()) {
+    if (commonTailIndex == SameTails.size() ||
+        (SameTails[commonTailIndex].getBlock() == PredBB &&
+         !SameTails[commonTailIndex].tailIsWholeBlock())) {
       // None of the blocks consist entirely of the common tail.
       // Split a block so that one does.
-      commonTailIndex = CreateCommonTailOnlyBlock(PredBB,  maxCommonTailLength);
+      commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
     }
 
-    MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+    MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
-    DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for ");
-    for (unsigned int i=0; i<SameTails.size(); ++i) {
-      if (commonTailIndex==i)
+    DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber()
+                 << " for ");
+    for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+      if (commonTailIndex == i)
         continue;
-      DEBUG(errs() << SameTails[i].first->second->getNumber() << ",");
+      DEBUG(errs() << "BB#" << SameTails[i].getBlock()->getNumber()
+                   << (i == e-1 ? "" : ", "));
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
-      ReplaceTailWithBranchTo(SameTails[i].second, MBB);
+      ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
-      MergePotentials.erase(SameTails[i].first);
+      MergePotentials.erase(SameTails[i].getMPIter());
     }
     DEBUG(errs() << "\n");
     // We leave commonTailIndex in the worklist in case there are other blocks
@@ -660,26 +775,27 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
 bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   if (!EnableTailMerge) return false;
- 
+
   bool MadeChange = false;
 
   // First find blocks with no successors.
   MergePotentials.clear();
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     if (I->succ_empty())
-      MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I));
   }
+
   // See if we can do any tail merging on those.
   if (MergePotentials.size() < TailMergeThreshold &&
       MergePotentials.size() >= 2)
-    MadeChange |= TryMergeBlocks(NULL, NULL);
+    MadeChange |= TryTailMergeBlocks(NULL, NULL);
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
   // We change each predecessor to a canonical form, by
   // (1) temporarily removing any unconditional branch from the predecessor
   // to IBB, and
   // (2) alter conditional branches so they branch to the other block
-  // not IBB; this may require adding back an unconditional branch to IBB 
+  // not IBB; this may require adding back an unconditional branch to IBB
   // later, where there wasn't one coming in.  E.g.
   //   Bcc IBB
   //   fallthrough to QBB
@@ -693,18 +809,19 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
   // a compile-time infinite loop repeatedly doing and undoing the same
   // transformations.)
 
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+  for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+       I != E; ++I) {
     if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
       SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
-      for (MachineBasicBlock::pred_iterator P = I->pred_begin(), 
+      for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
                                             E2 = I->pred_end();
            P != E2; ++P) {
-        MachineBasicBlock* PBB = *P;
+        MachineBasicBlock *PBB = *P;
         // Skip blocks that loop to themselves, can't tail merge these.
-        if (PBB==IBB)
+        if (PBB == IBB)
           continue;
         // Visit each predecessor only once.
         if (!UniquePreds.insert(PBB))
@@ -715,7 +832,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           // Failing case:  IBB is the target of a cbr, and
           // we cannot reverse the branch.
           SmallVector<MachineOperand, 4> NewCond(Cond);
-          if (!Cond.empty() && TBB==IBB) {
+          if (!Cond.empty() && TBB == IBB) {
             if (TII->ReverseBranchCondition(NewCond))
               continue;
             // This is the QBB case described above
@@ -727,20 +844,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           // to have a bit in the edge so we didn't have to do all this.
           if (IBB->isLandingPad()) {
             MachineFunction::iterator IP = PBB;  IP++;
-            MachineBasicBlock* PredNextBB = NULL;
-            if (IP!=MF.end())
+            MachineBasicBlock *PredNextBB = NULL;
+            if (IP != MF.end())
               PredNextBB = IP;
-            if (TBB==NULL) {
-              if (IBB!=PredNextBB)      // fallthrough
+            if (TBB == NULL) {
+              if (IBB != PredNextBB)      // fallthrough
                 continue;
             } else if (FBB) {
-              if (TBB!=IBB && FBB!=IBB)   // cbr then ubr
+              if (TBB != IBB && FBB != IBB)   // cbr then ubr
                 continue;
             } else if (Cond.empty()) {
-              if (TBB!=IBB)               // ubr
+              if (TBB != IBB)               // ubr
                 continue;
             } else {
-              if (TBB!=IBB && IBB!=PredNextBB)  // cbr
+              if (TBB != IBB && IBB != PredNextBB)  // cbr
                 continue;
             }
           }
@@ -749,19 +866,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
             TII->RemoveBranch(*PBB);
             if (!Cond.empty())
               // reinsert conditional branch only, for now
-              TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+              TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
           }
-          MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U),
+                                                       *P));
         }
       }
-    if (MergePotentials.size() >= 2)
-      MadeChange |= TryMergeBlocks(I, PredBB);
-    // Reinsert an unconditional branch if needed.
-    // The 1 below can occur as a result of removing blocks in TryMergeBlocks.
-    PredBB = prior(I);      // this may have been changed in TryMergeBlocks
-    if (MergePotentials.size()==1 && 
-        MergePotentials.begin()->second != PredBB)
-      FixTail(MergePotentials.begin()->second, I, TII);
+      if (MergePotentials.size() >= 2)
+        MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+      // Reinsert an unconditional branch if needed.
+      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      if (MergePotentials.size() == 1 &&
+          MergePotentials.begin()->getBlock() != PredBB)
+        FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
     }
   }
   return MadeChange;
@@ -773,14 +891,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
 bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
   bool MadeChange = false;
-  
+
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
 
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
     MadeChange |= OptimizeBlock(MBB);
-    
+
     // If it is dead, remove it.
     if (MBB->pred_empty()) {
       RemoveDeadBlock(MBB);
@@ -801,7 +919,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
 ///
 bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
                                   bool BranchUnAnalyzable,
-                                  MachineBasicBlock *TBB, 
+                                  MachineBasicBlock *TBB,
                                   MachineBasicBlock *FBB,
                                   const SmallVectorImpl<MachineOperand> &Cond) {
   MachineFunction::iterator Fallthrough = CurBB;
@@ -809,14 +927,22 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
   // If FallthroughBlock is off the end of the function, it can't fall through.
   if (Fallthrough == CurBB->getParent()->end())
     return false;
-  
+
   // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
   if (!CurBB->isSuccessor(Fallthrough))
     return false;
-  
-  // If we couldn't analyze the branch, assume it could fall through.
-  if (BranchUnAnalyzable) return true;
-  
+
+  // If we couldn't analyze the branch, examine the last instruction.
+  // If the block doesn't end in a known control barrier, assume fallthrough
+  // is possible. The isPredicable check is needed because this code can be
+  // called during IfConversion, where an instruction which is normally a
+  // Barrier is predicated and thus no longer an actual control barrier. This
+  // is over-conservative though, because if an instruction isn't actually
+  // predicated we could still treat it like a barrier.
+  if (BranchUnAnalyzable)
+    return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
+           CurBB->back().getDesc().isPredicable();
+
   // If there is no branch, control always falls through.
   if (TBB == 0) return true;
 
@@ -825,11 +951,11 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
   if (MachineFunction::iterator(TBB) == Fallthrough ||
       MachineFunction::iterator(FBB) == Fallthrough)
     return true;
-  
-  // If it's an unconditional branch to some block not the fall through, it 
+
+  // If it's an unconditional branch to some block not the fall through, it
   // doesn't fall through.
   if (Cond.empty()) return false;
-  
+
   // Otherwise, if it is conditional and has no explicit false block, it falls
   // through.
   return FBB == 0;
@@ -853,14 +979,14 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
 /// fall-through to MBB1 than to fall through into MBB2.  This has to return
 /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
 /// result in infinite loops.
-static bool IsBetterFallthrough(MachineBasicBlock *MBB1, 
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
                                 MachineBasicBlock *MBB2) {
   // Right now, we use a simple heuristic.  If MBB2 ends with a call, and
   // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
   // optimize branches that branch to either a return block or an assert block
   // into a fallthrough to the return.
   if (MBB1->empty() || MBB2->empty()) return false;
- 
+
   // If there is a clear successor ordering we make sure that one block
   // will fall through to the next
   if (MBB1->isSuccessor(MBB2)) return true;
@@ -871,14 +997,153 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
 }
 
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // Make sure blocks are numbered in order
+  MF.RenumberBlocks();
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+
+    // Only duplicate blocks that end with unconditional branches.
+    if (CanFallThrough(MBB))
+      continue;
+
+    MadeChange |= TailDuplicate(MBB, MF);
+
+    // If it is dead, remove it.
+    if (MBB->pred_empty()) {
+      RemoveDeadBlock(MBB);
+      MadeChange = true;
+      ++NumDeadBlocks;
+    }
+  }
+  return MadeChange;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
+                                 MachineFunction &MF) {
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
+  // Set the limit on the number of instructions to duplicate, with a default
+  // of one less than the tail-merge threshold. When optimizing for size,
+  // duplicate only one, because one branch instruction can be eliminated to
+  // compensate for the duplication.
+  unsigned MaxDuplicateCount =
+    MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ?
+    1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1);
+
+  // Check the instructions in the block to determine whether tail-duplication
+  // is invalid or unlikely to be profitable.
+  unsigned i = 0;
+  bool HasCall = false;
+  for (MachineBasicBlock::iterator I = TailBB->begin();
+       I != TailBB->end(); ++I, ++i) {
+    // Non-duplicable things shouldn't be tail-duplicated.
+    if (I->getDesc().isNotDuplicable()) return false;
+    // Don't duplicate more than the threshold.
+    if (i == MaxDuplicateCount) return false;
+    // Remember if we saw a call.
+    if (I->getDesc().isCall()) HasCall = true;
+  }
+  // Heuristically, don't tail-duplicate calls if it would expand code size,
+  // as it's less likely to be worth the extra cost.
+  if (i > 1 && HasCall)
+    return false;
+
+  // Iterate through all the unique predecessors and tail-duplicate this
+  // block into them, if possible. Copying the list ahead of time also
+  // avoids trouble with the predecessor list reallocating.
+  bool Changed = false;
+  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+                                               TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    assert(TailBB != PredBB &&
+           "Single-block loop should have been rejected earlier!");
+    if (PredBB->succ_size() > 1) continue;
+
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+    if (!PredCond.empty())
+      continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() != 1)
+      continue;
+    // Don't duplicate into a fall-through predecessor (at least for now).
+    if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB))
+      continue;
+
+    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From Succ: " << *TailBB);
+
+    // Remove PredBB's unconditional branch.
+    TII->RemoveBranch(*PredBB);
+    // Clone the contents of TailBB into PredBB.
+    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+         I != E; ++I) {
+      MachineInstr *NewMI = MF.CloneMachineInstr(I);
+      PredBB->insert(PredBB->end(), NewMI);
+    }
+
+    // Update the CFG.
+    PredBB->removeSuccessor(PredBB->succ_begin());
+    assert(PredBB->succ_empty() &&
+           "TailDuplicate called on block with multiple successors!");
+    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+         E = TailBB->succ_end(); I != E; ++I)
+       PredBB->addSuccessor(*I);
+
+    Changed = true;
+  }
+
+  // If TailBB was duplicated into all its predecessors except for the prior
+  // block, which falls through unconditionally, move the contents of this
+  // block into the prior block.
+  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+  // This has to check PrevBB->succ_size() because EH edges are ignored by
+  // AnalyzeBranch.
+  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+      TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+      !TailBB->hasAddressTaken()) {
+    DEBUG(errs() << "\nMerging into block: " << PrevBB
+          << "From MBB: " << *TailBB);
+    PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
+    PrevBB.removeSuccessor(PrevBB.succ_begin());;
+    assert(PrevBB.succ_empty());
+    PrevBB.transferSuccessors(TailBB);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   bool MadeChange = false;
+  MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
 
   MachineFunction::iterator FallThrough = MBB;
   ++FallThrough;
-  
+
   // If this block is empty, make everyone use its fall-through, not the block
   // explicitly.  Landing pads should not do this since the landing-pad table
   // points to this block.  Blocks with their addresses taken shouldn't be
@@ -886,8 +1151,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
     // Dead block?  Leave for cleanup later.
     if (MBB->pred_empty()) return MadeChange;
-    
-    if (FallThrough == MBB->getParent()->end()) {
+
+    if (FallThrough == MF.end()) {
       // TODO: Simplify preds to not branch here if possible!
     } else {
       // Rewrite all predecessors of the old block to go to the fallthrough
@@ -898,8 +1163,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       }
       // If MBB was the target of a jump table, update jump tables to go to the
       // fallthrough instead.
-      MBB->getParent()->getJumpTableInfo()->
-        ReplaceMBBInJumpTables(MBB, FallThrough);
+      MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough);
       MadeChange = true;
     }
     return MadeChange;
@@ -917,29 +1181,49 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     // If the CFG for the prior block has extra edges, remove them.
     MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
                                               !PriorCond.empty());
-    
+
     // If the previous branch is conditional and both conditions go to the same
     // destination, remove the branch, replacing it with an unconditional one or
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
       TII->RemoveBranch(PrevBB);
-      PriorCond.clear(); 
+      PriorCond.clear();
       if (PriorTBB != MBB)
         TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
       MadeChange = true;
       ++NumBranchOpts;
-      return OptimizeBlock(MBB);
+      goto ReoptimizeBlock;
     }
-    
+
+    // If the previous block unconditionally falls through to this block and
+    // this block has no other predecessors, move the contents of this block
+    // into the prior block. This doesn't usually happen when SimplifyCFG
+    // has been used, but it can happen if tail merging splits a fall-through
+    // predecessor of a block.
+    // This has to check PrevBB->succ_size() because EH edges are ignored by
+    // AnalyzeBranch.
+    if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+        PrevBB.succ_size() == 1 &&
+        !MBB->hasAddressTaken()) {
+      DEBUG(errs() << "\nMerging into block: " << PrevBB
+                   << "From MBB: " << *MBB);
+      PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+      PrevBB.removeSuccessor(PrevBB.succ_begin());;
+      assert(PrevBB.succ_empty());
+      PrevBB.transferSuccessors(MBB);
+      MadeChange = true;
+      return MadeChange;
+    }
+
     // If the previous branch *only* branches to *this* block (conditional or
     // not) remove the branch.
     if (PriorTBB == MBB && PriorFBB == 0) {
       TII->RemoveBranch(PrevBB);
       MadeChange = true;
       ++NumBranchOpts;
-      return OptimizeBlock(MBB);
+      goto ReoptimizeBlock;
     }
-    
+
     // If the prior block branches somewhere else on the condition and here if
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
@@ -947,9 +1231,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
       MadeChange = true;
       ++NumBranchOpts;
-      return OptimizeBlock(MBB);
+      goto ReoptimizeBlock;
     }
-    
+
     // If the prior block branches here on true and somewhere else on false, and
     // if the branch condition is reversible, reverse the branch to create a
     // fall-through.
@@ -960,10 +1244,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
         MadeChange = true;
         ++NumBranchOpts;
-        return OptimizeBlock(MBB);
+        goto ReoptimizeBlock;
       }
     }
-    
+
     // If this block has no successors (e.g. it is a return block or ends with
     // a call to a no-return function like abort or __cxa_throw) and if the pred
     // falls through into this block, and if it would otherwise fall through
@@ -976,13 +1260,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         MachineFunction::iterator(PriorTBB) == FallThrough &&
         !CanFallThrough(MBB)) {
       bool DoTransform = true;
-      
+
       // We have to be careful that the succs of PredBB aren't both no-successor
       // blocks.  If neither have successors and if PredBB is the second from
       // last block in the function, we'd just keep swapping the two blocks for
       // last.  Only do the swap if one is clearly better to fall through than
       // the other.
-      if (FallThrough == --MBB->getParent()->end() &&
+      if (FallThrough == --MF.end() &&
           !IsBetterFallthrough(PriorTBB, MBB))
         DoTransform = false;
 
@@ -1000,20 +1284,20 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       if (DoTransform && !MBB->succ_empty() &&
           (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
         DoTransform = false;
-      
-      
+
+
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
         if (!TII->ReverseBranchCondition(NewPriorCond)) {
           DEBUG(errs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
-          
+
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
 
           // Move this block to the end of the function.
-          MBB->moveAfter(--MBB->getParent()->end());
+          MBB->moveAfter(--MF.end());
           MadeChange = true;
           ++NumBranchOpts;
           return MadeChange;
@@ -1021,7 +1305,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       }
     }
   }
-  
+
   // Analyze the branch in the current block.
   MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
   SmallVector<MachineOperand, 4> CurCond;
@@ -1030,7 +1314,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     // If the CFG for the prior block has extra edges, remove them.
     MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
 
-    // If this is a two-way branch, and the FBB branches to this block, reverse 
+    // If this is a two-way branch, and the FBB branches to this block, reverse
     // the condition so the single-basic-block loop is faster.  Instead of:
     //    Loop: xxx; jcc Out; jmp Loop
     // we want:
@@ -1042,14 +1326,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
         MadeChange = true;
         ++NumBranchOpts;
-        return OptimizeBlock(MBB);
+        goto ReoptimizeBlock;
       }
     }
-    
-    
+
     // If this branch is the only thing in its block, see if we can forward
     // other blocks across it.
-    if (CurTBB && CurCond.empty() && CurFBB == 0 && 
+    if (CurTBB && CurCond.empty() && CurFBB == 0 &&
         MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
       // This block may contain just an unconditional branch.  Because there can
@@ -1068,7 +1351,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
             !PrevBB.isSuccessor(MBB)) {
           // If the prior block falls through into us, turn it into an
           // explicit branch to us to make updates simpler.
-          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && 
+          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
               PriorTBB != MBB && PriorFBB != MBB) {
             if (PriorTBB == 0) {
               assert(PriorCond.empty() && PriorFBB == 0 &&
@@ -1104,18 +1387,17 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
                       NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
                 TII->RemoveBranch(*PMBB);
-                NewCurCond.clear(); 
+                NewCurCond.clear();
                 TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
                 MadeChange = true;
                 ++NumBranchOpts;
-                PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false);
+                PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
               }
             }
           }
 
           // Change any jumptables to go to the new MBB.
-          MBB->getParent()->getJumpTableInfo()->
-            ReplaceMBBInJumpTables(MBB, CurTBB);
+          MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB);
           if (DidChange) {
             ++NumBranchOpts;
             MadeChange = true;
@@ -1123,7 +1405,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           }
         }
       }
-      
+
       // Add the branch back if the block is more than just an uncond branch.
       TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
     }
@@ -1134,9 +1416,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   // place to move this block where a fall-through will happen.
   if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
                       PriorTBB, PriorFBB, PriorCond)) {
+
     // Now we know that there was no fall-through into this block, check to
     // see if it has a fall-through into its successor.
-    bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, 
+    bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
                                        CurCond);
 
     if (!MBB->isLandingPad()) {
@@ -1147,12 +1430,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         // Analyze the branch at the end of the pred.
         MachineBasicBlock *PredBB = *PI;
         MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
-        if (PredBB != MBB && !CanFallThrough(PredBB)
+        MachineBasicBlock *PredTBB, *PredFBB;
+        SmallVector<MachineOperand, 4> PredCond;
+        if (PredBB != MBB && !CanFallThrough(PredBB) &&
+            !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
             && (!CurFallsThru || !CurTBB || !CurFBB)
             && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
           // If the current block doesn't fall through, just move it.
           // If the current block can fall through and does not end with a
-          // conditional branch, we need to append an unconditional jump to 
+          // conditional branch, we need to append an unconditional jump to
           // the (current) next block.  To avoid a possible compile-time
           // infinite loop, move blocks only backward in this case.
           // Also, if there are already 2 branches here, we cannot add a third;
@@ -1167,11 +1453,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
-          return OptimizeBlock(MBB);
+          goto ReoptimizeBlock;
         }
       }
     }
-        
+
     if (!CurFallsThru) {
       // Check all successors to see if we can move this block before it.
       for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
@@ -1179,26 +1465,29 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
         // Analyze the branch at the end of the block before the succ.
         MachineBasicBlock *SuccBB = *SI;
         MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
-        std::vector<MachineOperand> SuccPrevCond;
-        
+
         // If this block doesn't already fall-through to that successor, and if
         // the succ doesn't already have a block that can fall through into it,
         // and if the successor isn't an EH destination, we can arrange for the
         // fallthrough to happen.
-        if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+        if (SuccBB != MBB && &*SuccPrev != MBB &&
+            !CanFallThrough(SuccPrev) && !CurUnAnalyzable &&
             !SuccBB->isLandingPad()) {
           MBB->moveBefore(SuccBB);
           MadeChange = true;
-          return OptimizeBlock(MBB);
+          goto ReoptimizeBlock;
         }
       }
-      
+
       // Okay, there is no really great place to put this block.  If, however,
       // the block before this one would be a fall-through if this block were
       // removed, move this block to the end of the function.
-      if (FallThrough != MBB->getParent()->end() &&
+      MachineBasicBlock *PrevTBB, *PrevFBB;
+      SmallVector<MachineOperand, 4> PrevCond;
+      if (FallThrough != MF.end() &&
+          !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(FallThrough)) {
-        MBB->moveAfter(--MBB->getParent()->end());
+        MBB->moveAfter(--MF.end());
         MadeChange = true;
         return MadeChange;
       }
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 9763e33..4920755 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -11,7 +11,6 @@
 #define LLVM_CODEGEN_BRANCHFOLDING_HPP
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include <vector>
 
 namespace llvm {
@@ -20,6 +19,7 @@ namespace llvm {
   class RegScavenger;
   class TargetInstrInfo;
   class TargetRegisterInfo;
+  template<typename T> class SmallVectorImpl;
 
   class BranchFolder {
   public:
@@ -30,11 +30,58 @@ namespace llvm {
                           const TargetRegisterInfo *tri,
                           MachineModuleInfo *mmi);
   private:
-    typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+    class MergePotentialsElt {
+      unsigned Hash;
+      MachineBasicBlock *Block;
+    public:
+      MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+        : Hash(h), Block(b) {}
+
+      unsigned getHash() const { return Hash; }
+      MachineBasicBlock *getBlock() const { return Block; }
+
+      void setBlock(MachineBasicBlock *MBB) {
+        Block = MBB;
+      }
+
+      bool operator<(const MergePotentialsElt &) const;
+    };
     typedef std::vector<MergePotentialsElt>::iterator MPIterator;
     std::vector<MergePotentialsElt> MergePotentials;
 
-    typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+    class SameTailElt {
+      MPIterator MPIter;
+      MachineBasicBlock::iterator TailStartPos;
+    public:
+      SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+        : MPIter(mp), TailStartPos(tsp) {}
+
+      MPIterator getMPIter() const {
+        return MPIter;
+      }
+      MergePotentialsElt &getMergePotentialsElt() const {
+        return *getMPIter();
+      }
+      MachineBasicBlock::iterator getTailStartPos() const {
+        return TailStartPos;
+      }
+      unsigned getHash() const {
+        return getMergePotentialsElt().getHash();
+      }
+      MachineBasicBlock *getBlock() const {
+        return getMergePotentialsElt().getBlock();
+      }
+      bool tailIsWholeBlock() const {
+        return TailStartPos == getBlock()->begin();
+      }
+
+      void setBlock(MachineBasicBlock *MBB) {
+        getMergePotentialsElt().setBlock(MBB);
+      }
+      void setTailStartPos(MachineBasicBlock::iterator Pos) {
+        TailStartPos = Pos;
+      }
+    };
     std::vector<SameTailElt> SameTails;
 
     bool EnableTailMerge;
@@ -44,18 +91,23 @@ namespace llvm {
     RegScavenger *RS;
 
     bool TailMergeBlocks(MachineFunction &MF);
-    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
-                        MachineBasicBlock* PredBB);
+    bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+                       MachineBasicBlock* PredBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
                                   MachineBasicBlock::iterator BBI1);
-    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB);
     void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
                                                 MachineBasicBlock* PredBB);
     unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
                                        unsigned maxCommonTailLength);
 
+    bool TailDuplicateBlocks(MachineFunction &MF);
+    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+    
     bool OptimizeBranches(MachineFunction &MF);
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
@@ -66,19 +118,6 @@ namespace llvm {
                         MachineBasicBlock *TBB, MachineBasicBlock *FBB,
                         const SmallVectorImpl<MachineOperand> &Cond);
   };
-
-
-  /// BranchFolderPass - Wrap branch folder in a machine function pass.
-  class BranchFolderPass : public MachineFunctionPass,
-                           public BranchFolder {
-  public:
-    static char ID;
-    explicit BranchFolderPass(bool defaultEnableTailMerge)
-      :  MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
-  };
 }
 
 #endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 6fff12c..e9844d8 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -56,7 +56,6 @@ namespace {
                 MachineFunction::iterator InsertPt,
                 MachineFunction::iterator Begin,
                 MachineFunction::iterator End);
-    void UpdateTerminator(MachineBasicBlock *MBB);
     bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
                                           MachineLoop *L);
     bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
@@ -141,66 +140,9 @@ void CodePlacementOpt::Splice(MachineFunction &MF,
 
   MF.splice(InsertPt, Begin, End);
 
-  UpdateTerminator(prior(Begin));
-  UpdateTerminator(OldBeginPrior);
-  UpdateTerminator(OldEndPrior);
-}
-
-/// UpdateTerminator - Update the terminator instructions in MBB to account
-/// for changes to the layout. If the block previously used a fallthrough,
-/// it may now need a branch, and if it previously used branching it may now
-/// be able to use a fallthrough.
-///
-void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) {
-  // A block with no successors has no concerns with fall-through edges.
-  if (MBB->succ_empty()) return;
-
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
-  (void) B;
-  assert(!B && "UpdateTerminators requires analyzable predecessors!");
-  if (Cond.empty()) {
-    if (TBB) {
-      // The block has an unconditional branch. If its successor is now
-      // its layout successor, delete the branch.
-      if (MBB->isLayoutSuccessor(TBB))
-        TII->RemoveBranch(*MBB);
-    } else {
-      // The block has an unconditional fallthrough. If its successor is not
-      // its layout successor, insert a branch.
-      TBB = *MBB->succ_begin();
-      if (!MBB->isLayoutSuccessor(TBB))
-        TII->InsertBranch(*MBB, TBB, 0, Cond);
-    }
-  } else {
-    if (FBB) {
-      // The block has a non-fallthrough conditional branch. If one of its
-      // successors is its layout successor, rewrite it to a fallthrough
-      // conditional branch.
-      if (MBB->isLayoutSuccessor(TBB)) {
-        TII->RemoveBranch(*MBB);
-        TII->ReverseBranchCondition(Cond);
-        TII->InsertBranch(*MBB, FBB, 0, Cond);
-      } else if (MBB->isLayoutSuccessor(FBB)) {
-        TII->RemoveBranch(*MBB);
-        TII->InsertBranch(*MBB, TBB, 0, Cond);
-      }
-    } else {
-      // The block has a fallthrough conditional branch.
-      MachineBasicBlock *MBBA = *MBB->succ_begin();
-      MachineBasicBlock *MBBB = *next(MBB->succ_begin());
-      if (MBBA == TBB) std::swap(MBBB, MBBA);
-      if (MBB->isLayoutSuccessor(TBB)) {
-        TII->RemoveBranch(*MBB);
-        TII->ReverseBranchCondition(Cond);
-        TII->InsertBranch(*MBB, MBBA, 0, Cond);
-      } else if (!MBB->isLayoutSuccessor(MBBA)) {
-        TII->RemoveBranch(*MBB);
-        TII->InsertBranch(*MBB, TBB, MBBA, Cond);
-      }
-    }
-  }
+  prior(Begin)->updateTerminator();
+  OldBeginPrior->updateTerminator();
+  OldEndPrior->updateTerminator();
 }
 
 /// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3e3b28a..8a3bd0b 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -515,6 +515,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
      if (CI->getType() != Type::getVoidTy(Context))
        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
      break;
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+    break;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    break;
   }
 
   assert(CI->use_empty() &&
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 794ecf7..23dce4a 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,7 +55,10 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
   SUnit *OnlyAvailablePred = 0;
   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+    if (IgnoreAntiDep && 
+        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
+      continue;
+
     SUnit &Pred = *I->getSUnit();
     if (!Pred.isScheduled) {
       // We found an available, but not scheduled, predecessor.  If it's the
@@ -75,7 +78,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
   unsigned NumNodesBlocking = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+    if (IgnoreAntiDep && 
+        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
+      continue;
+
     if (getSingleUnscheduledPred(I->getSUnit()) == SU)
       ++NumNodesBlocking;
   }
@@ -92,7 +98,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+    if (IgnoreAntiDep && 
+        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
+      continue;
+
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
   }
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2a93a35..a60d34f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -53,7 +53,8 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
 static cl::opt<bool> EnableFastSpilling("fast-spill",
                                         cl::init(false), cl::Hidden);
 
-static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false));
+static cl::opt<bool> EarlyCoalescing("early-coalescing",
+                                     cl::init(false), cl::Hidden);
 
 static cl::opt<int> CoalescingLimit("early-coalescing-limit",
                                     cl::init(-1), cl::Hidden);
@@ -646,17 +647,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                           0, false, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
-  
+
   interval.addRange(LR);
   LR.valno->addKill(end);
   DEBUG(errs() << " +" << LR << '\n');
 }
 
-bool
-LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
-                                   SmallVector<MachineInstr*,16> &IdentCopies,
-                                   SmallVector<MachineInstr*,16> &OtherCopies) {
-  bool HaveConflict = false;
+bool LiveIntervals::
+isSafeAndProfitableToCoalesce(LiveInterval &DstInt,
+                              LiveInterval &SrcInt,
+                              SmallVector<MachineInstr*,16> &IdentCopies,
+                              SmallVector<MachineInstr*,16> &OtherCopies) {
   unsigned NumIdent = 0;
   for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg),
          re = mri_->def_end(); ri != re; ++ri) {
@@ -665,16 +666,16 @@ LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt
     if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
       return false;
     if (SrcReg != DstInt.reg) {
+      // Non-identity copy - we cannot handle overlapping intervals
+      if (DstInt.liveAt(getInstructionIndex(MI)))
+        return false;
       OtherCopies.push_back(MI);
-      HaveConflict |= DstInt.liveAt(getInstructionIndex(MI));
     } else {
       IdentCopies.push_back(MI);
       ++NumIdent;
     }
   }
 
-  if (!HaveConflict)
-    return false; // Let coalescer handle it
   return IdentCopies.size() > OtherCopies.size();
 }
 
@@ -701,19 +702,21 @@ void LiveIntervals::performEarlyCoalescing() {
     LiveInterval &SrcInt = getInterval(PHISrc);
     SmallVector<MachineInstr*, 16> IdentCopies;
     SmallVector<MachineInstr*, 16> OtherCopies;
-    if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies))
+    if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt,
+                                       IdentCopies, OtherCopies))
       continue;
 
     DEBUG(errs() << "PHI Join: " << *Join);
     assert(DstInt.containsOneValue() && "PHI join should have just one val#!");
+    assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 &&
+           "PHI join src should not be used elsewhere");
     VNInfo *VNI = DstInt.getValNumInfo(0);
 
     // Change the non-identity copies to directly target the phi destination.
     for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) {
       MachineInstr *PHICopy = OtherCopies[i];
-      DEBUG(errs() << "Moving: " << *PHICopy);
-
       SlotIndex MIIndex = getInstructionIndex(PHICopy);
+      DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy);
       SlotIndex DefIndex = MIIndex.getDefIndex();
       LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
       SlotIndex StartIndex = SLR->start;
@@ -724,8 +727,7 @@ void LiveIntervals::performEarlyCoalescing() {
       SrcInt.removeValNo(SLR->valno);
       DEBUG(errs() << "  added range [" << StartIndex << ','
             << EndIndex << "] to reg" << DstInt.reg << '\n');
-      if (DstInt.liveAt(StartIndex))
-        DstInt.removeRange(StartIndex, EndIndex);
+      assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!");
       VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true,
                                            VNInfoAllocator);
       NewVNI->setHasPHIKill(true);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 96c655c..16a79bb 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -50,6 +50,14 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+  for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+    if (Kills[i]->getParent() == MBB)
+      return Kills[i];
+  return NULL;
+}
+
 void LiveVariables::VarInfo::dump() const {
   errs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
@@ -222,8 +230,9 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
 /// implicit defs to a machine instruction if there was an earlier def of its
 /// super-register.
 void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
   // If there was a previous use or a "full" def all is well.
-  if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) {
+  if (!LastDef && !PhysRegUse[Reg]) {
     // Otherwise, the last sub-register def implicitly defines this register.
     // e.g.
     // AH =
@@ -257,6 +266,11 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
       }
     }
   }
+  else if (LastDef && !PhysRegUse[Reg] &&
+           !LastDef->findRegisterDefOperand(Reg))
+    // Last def defines the super register, add an implicit def of reg.
+    LastDef->addOperand(MachineOperand::CreateReg(Reg,
+                                                 true/*IsDef*/, true/*IsImp*/));
 
   // Remember this use.
   PhysRegUse[Reg]  = MI;
@@ -641,3 +655,36 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
         PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
           .push_back(BBI->getOperand(i).getReg());
 }
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+                                MachineBasicBlock *DomBB) {
+  const unsigned NumNew = BB->getNumber();
+  const unsigned NumDom = DomBB->getNumber();
+
+  // Update info for all live variables
+  for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
+         E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+    VarInfo &VI = getVarInfo(Reg);
+
+    // Anything live through DomBB is also live through BB.
+    if (VI.AliveBlocks.test(NumDom)) {
+      VI.AliveBlocks.set(NumNew);
+      continue;
+    }
+
+    // Variables not defined in DomBB cannot be live out.
+    const MachineInstr *Def = MRI->getVRegDef(Reg);
+    if (!Def || Def->getParent() != DomBB)
+      continue;
+
+    // Killed by DomBB?
+    if (VI.findKill(DomBB))
+      continue;
+
+    // This register is defined in DomBB and live out
+    VI.AliveBlocks.set(NumNew);
+  }
+}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 7fbdb12..cd52825 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
@@ -242,6 +243,58 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
   getParent()->splice(++BBI, this);
 }
 
+void MachineBasicBlock::updateTerminator() {
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  // A block with no successors has no concerns with fall-through edges.
+  if (this->succ_empty()) return;
+
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+  (void) B;
+  assert(!B && "UpdateTerminators requires analyzable predecessors!");
+  if (Cond.empty()) {
+    if (TBB) {
+      // The block has an unconditional branch. If its successor is now
+      // its layout successor, delete the branch.
+      if (isLayoutSuccessor(TBB))
+        TII->RemoveBranch(*this);
+    } else {
+      // The block has an unconditional fallthrough. If its successor is not
+      // its layout successor, insert a branch.
+      TBB = *succ_begin();
+      if (!isLayoutSuccessor(TBB))
+        TII->InsertBranch(*this, TBB, 0, Cond);
+    }
+  } else {
+    if (FBB) {
+      // The block has a non-fallthrough conditional branch. If one of its
+      // successors is its layout successor, rewrite it to a fallthrough
+      // conditional branch.
+      if (isLayoutSuccessor(TBB)) {
+        TII->RemoveBranch(*this);
+        TII->ReverseBranchCondition(Cond);
+        TII->InsertBranch(*this, FBB, 0, Cond);
+      } else if (isLayoutSuccessor(FBB)) {
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, TBB, 0, Cond);
+      }
+    } else {
+      // The block has a fallthrough conditional branch.
+      MachineBasicBlock *MBBA = *succ_begin();
+      MachineBasicBlock *MBBB = *next(succ_begin());
+      if (MBBA == TBB) std::swap(MBBB, MBBA);
+      if (isLayoutSuccessor(TBB)) {
+        TII->RemoveBranch(*this);
+        TII->ReverseBranchCondition(Cond);
+        TII->InsertBranch(*this, MBBA, 0, Cond);
+      } else if (!isLayoutSuccessor(MBBA)) {
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, TBB, MBBA, Cond);
+      }
+    }
+  }
+}
 
 void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
   Successors.push_back(succ);
@@ -371,10 +424,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
   MachineBasicBlock::succ_iterator SI = succ_begin();
   MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
   while (SI != succ_end()) {
-    if (*SI == DestA && DestA == DestB) {
-      DestA = DestB = 0;
-      ++SI;
-    } else if (*SI == DestA) {
+    if (*SI == DestA) {
       DestA = 0;
       ++SI;
     } else if (*SI == DestB) {
@@ -397,3 +447,8 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
   }
   return MadeChange;
 }
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+                          bool t) {
+  OS << "BB#" << MBB->getNumber();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5a1d9e6..81d1301 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -441,9 +441,10 @@ DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
 /// index with a negative value.
 ///
 int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
-                                        bool Immutable) {
+                                        bool Immutable, bool isSS) {
   assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
-  Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable));
+  Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable,
+                                              isSS));
   return -++NumFixedObjects;
 }
 
@@ -529,10 +530,6 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const {
 unsigned MachineJumpTableInfo::getJumpTableIndex(
                                const std::vector<MachineBasicBlock*> &DestBBs) {
   assert(!DestBBs.empty() && "Cannot create an empty jump table!");
-  for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
-    if (JumpTables[i].MBBs == DestBBs)
-      return i;
-  
   JumpTables.push_back(MachineJumpTableEntry(DestBBs));
   return JumpTables.size()-1;
 }
@@ -544,14 +541,25 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
                                              MachineBasicBlock *New) {
   assert(Old != New && "Not making a change?");
   bool MadeChange = false;
-  for (size_t i = 0, e = JumpTables.size(); i != e; ++i) {
-    MachineJumpTableEntry &JTE = JumpTables[i];
-    for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
-      if (JTE.MBBs[j] == Old) {
-        JTE.MBBs[j] = New;
-        MadeChange = true;
-      }
-  }
+  for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+    ReplaceMBBInJumpTable(i, Old, New);
+  return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool
+MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+                                            MachineBasicBlock *Old,
+                                            MachineBasicBlock *New) {
+  assert(Old != New && "Not making a change?");
+  bool MadeChange = false;
+  MachineJumpTableEntry &JTE = JumpTables[Idx];
+  for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+    if (JTE.MBBs[j] == Old) {
+      JTE.MBBs[j] = New;
+      MadeChange = true;
+    }
   return MadeChange;
 }
 
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 56294d9..f5febc5 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -24,7 +24,7 @@ X("Machine Function Analysis", "machine-function-analysis",
 
 char MachineFunctionAnalysis::ID = 0;
 
-MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm,
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
                                                  CodeGenOpt::Level OL) :
   FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
 }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 5744c8a..b250faa 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -189,19 +189,19 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
 /// print - Print the specified machine operand.
 ///
 void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+  // If the instruction is embedded into a basic block, we can find the
+  // target info for the instruction.
+  if (!TM)
+    if (const MachineInstr *MI = getParent())
+      if (const MachineBasicBlock *MBB = MI->getParent())
+        if (const MachineFunction *MF = MBB->getParent())
+          TM = &MF->getTarget();
+
   switch (getType()) {
   case MachineOperand::MO_Register:
     if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
       OS << "%reg" << getReg();
     } else {
-      // If the instruction is embedded into a basic block, we can find the
-      // target info for the instruction.
-      if (TM == 0)
-        if (const MachineInstr *MI = getParent())
-          if (const MachineBasicBlock *MBB = MI->getParent())
-            if (const MachineFunction *MF = MBB->getParent())
-              TM = &MF->getTarget();
-      
       if (TM)
         OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
       else
@@ -265,7 +265,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << "<jt#" << getIndex() << '>';
     break;
   case MachineOperand::MO_GlobalAddress:
-    OS << "<ga:" << ((Value*)getGlobal())->getName();
+    OS << "<ga:";
+    WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
@@ -375,7 +376,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
 /// TID NULL and no operands.
 MachineInstr::MachineInstr()
-  : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+  : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -395,7 +396,8 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// TargetInstrDesc or the numOperands if it is not zero. (for
 /// instructions with variable number of operands).
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
-  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
+  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0),
     debugLoc(DebugLoc::getUnknownLoc()) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -413,7 +415,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
 /// MachineInstr ctor - As above, but with a DebugLoc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
-  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -433,7 +435,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// basic block.
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), 
+  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), 
     debugLoc(DebugLoc::getUnknownLoc()) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
@@ -453,7 +456,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
@@ -472,7 +475,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : TID(&MI.getDesc()), NumImplicitOps(0),
+  : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0),
     MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
@@ -1060,9 +1063,16 @@ void MachineInstr::dump() const {
 }
 
 void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
-  unsigned StartOp = 0, e = getNumOperands();
+  // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+  const MachineFunction *MF = 0;
+  if (const MachineBasicBlock *MBB = getParent()) {
+    MF = MBB->getParent();
+    if (!TM && MF)
+      TM = &MF->getTarget();
+  }
 
   // Print explicitly defined operands on the left of an assignment syntax.
+  unsigned StartOp = 0, e = getNumOperands();
   for (; StartOp < e && getOperand(StartOp).isReg() &&
          getOperand(StartOp).isDef() &&
          !getOperand(StartOp).isImplicit();
@@ -1078,11 +1088,45 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   OS << getDesc().getName();
 
   // Print the rest of the operands.
+  bool OmittedAnyCallClobbers = false;
+  bool FirstOp = true;
   for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
-    if (i != StartOp)
-      OS << ",";
+    const MachineOperand &MO = getOperand(i);
+
+    // Omit call-clobbered registers which aren't used anywhere. This makes
+    // call instructions much less noisy on targets where calls clobber lots
+    // of registers. Don't rely on MO.isDead() because we may be called before
+    // LiveVariables is run, or we may be looking at a non-allocatable reg.
+    if (MF && getDesc().isCall() &&
+        MO.isReg() && MO.isImplicit() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        const MachineRegisterInfo &MRI = MF->getRegInfo();
+        if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+          bool HasAliasLive = false;
+          for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+               unsigned AliasReg = *Alias; ++Alias)
+            if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+              HasAliasLive = true;
+              break;
+            }
+          if (!HasAliasLive) {
+            OmittedAnyCallClobbers = true;
+            continue;
+          }
+        }
+      }
+    }
+
+    if (FirstOp) FirstOp = false; else OS << ",";
     OS << " ";
-    getOperand(i).print(OS, TM);
+    MO.print(OS, TM);
+  }
+
+  // Briefly indicate whether any call clobbers were omitted.
+  if (OmittedAnyCallClobbers) {
+    if (FirstOp) FirstOp = false; else OS << ",";
+    OS << " ...";
   }
 
   bool HaveSemi = false;
@@ -1098,12 +1142,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     }
   }
 
-  if (!debugLoc.isUnknown()) {
+  if (!debugLoc.isUnknown() && MF) {
     if (!HaveSemi) OS << ";"; HaveSemi = true;
 
     // TODO: print InlinedAtLoc information
 
-    const MachineFunction *MF = getParent()->getParent();
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
     DICompileUnit CU(DLT.Scope);
     if (!CU.isNull())
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index de3ab27..33b6b82 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,6 +22,7 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -43,6 +44,7 @@ STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
 
 namespace {
   class MachineLICM : public MachineFunctionPass {
+    MachineConstantPool *MCP;
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -111,6 +113,11 @@ namespace {
     /// be hoistable.
     MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
 
+    /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+    /// duplicate of MI. Return this instruction if it's found.
+    const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+                                     std::vector<const MachineInstr*> &PrevMIs);
+
     /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
     /// the preheader that compute the same value. If it's found, do a RAU on
     /// with the definition of the existing instruction rather than hoisting
@@ -153,6 +160,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(errs() << "******** Machine LICM ********\n");
 
   Changed = FirstInLoop = false;
+  MCP = MF.getConstantPool();
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
@@ -234,9 +242,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
     if (!I.isInvariantLoad(AA))
-      // FIXME: we should be able to sink loads with no other side effects if
-      // there is nothing that can change memory from here until the end of
-      // block. This is a trivial form of alias analysis.
+      // FIXME: we should be able to hoist loads with no other side effects if
+      // there are no other instructions which can change memory in this loop.
+      // This is a trivial form of alias analysis.
       return false;
   }
 
@@ -432,32 +440,12 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
   }
 }
 
-static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
-                                      std::vector<const MachineInstr*> &PrevMIs,
-                                      MachineRegisterInfo *RegInfo) {
-  unsigned NumOps = MI->getNumOperands();
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+                              std::vector<const MachineInstr*> &PrevMIs) {
   for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
     const MachineInstr *PrevMI = PrevMIs[i];
-    unsigned NumOps2 = PrevMI->getNumOperands();
-    if (NumOps != NumOps2)
-      continue;
-    bool IsSame = true;
-    for (unsigned j = 0; j != NumOps; ++j) {
-      const MachineOperand &MO = MI->getOperand(j);
-      if (MO.isReg() && MO.isDef()) {
-        if (RegInfo->getRegClass(MO.getReg()) !=
-            RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
-          IsSame = false;
-          break;
-        }
-        continue;
-      }
-      if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
-        IsSame = false;
-        break;
-      }
-    }
-    if (IsSame)
+    if (TII->isIdentical(MI, PrevMI, RegInfo))
       return PrevMI;
   }
   return 0;
@@ -465,18 +453,19 @@ static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
 
 bool MachineLICM::EliminateCSE(MachineInstr *MI,
           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
-  if (CI != CSEMap.end()) {
-    if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo)) {
-      DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.isDef())
-          RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
-      }
-      MI->eraseFromParent();
-      ++NumCSEed;
-      return true;
+  if (CI == CSEMap.end())
+    return false;
+
+  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+    DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDef())
+        RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
     }
+    MI->eraseFromParent();
+    ++NumCSEed;
+    return true;
   }
   return false;
 }
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index b62803f..4b067a0 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -76,9 +76,7 @@ void MachineModuleInfo::EndFunction() {
   FilterEnds.clear();
   CallsEHReturn = 0;
   CallsUnwindInit = 0;
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
   VariableDbgInfo.clear();
-#endif
 }
 
 /// AnalyzeModule - Scan the module for global debug information.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 99812e0..be9f68f 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -175,6 +175,10 @@ FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
   return new MachineVerifier(allowPhysDoubleDefs);
 }
 
+void MachineFunction::verify() const {
+  MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   raw_ostream *OutFile = 0;
   if (OutFileName) {
@@ -287,7 +291,18 @@ void MachineVerifier::visitMachineFunctionBefore() {
   markReachable(&MF->front());
 }
 
-void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+// Does iterator point to a and b as the first two elements?
+bool matchPair(MachineBasicBlock::const_succ_iterator i,
+               const MachineBasicBlock *a, const MachineBasicBlock *b) {
+  if (*i == a)
+    return *++i == b;
+  if (*i == b)
+    return *++i == a;
+  return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
   // Start with minimal CFG sanity checks.
@@ -379,8 +394,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
       } if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/fall-through but doesn't have "
                "exactly two CFG successors!", MBB);
-      } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) ||
-                 (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) {
+      } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
         report("MBB exits via conditional branch/fall-through but the CFG "
                "successors don't match the actual successors!", MBB);
       }
@@ -400,8 +414,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
       if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/branch but doesn't have "
                "exactly two CFG successors!", MBB);
-      } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) ||
-                 (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) {
+      } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
         report("MBB exits via conditional branch/branch but the CFG "
                "successors don't match the actual successors!", MBB);
       }
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 8071b0a..cd38dd1 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -15,24 +15,32 @@
 
 #define DEBUG_TYPE "phielim"
 #include "PHIElimination.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Function.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include <algorithm>
 #include <map>
 using namespace llvm;
 
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumSplits, "Number of critical edges split on demand");
+
+static cl::opt<bool>
+SplitEdges("split-phi-edges",
+           cl::desc("Split critical edges during phi elimination"),
+           cl::init(false), cl::Hidden);
 
 char PHIElimination::ID = 0;
 static RegisterPass<PHIElimination>
@@ -40,11 +48,26 @@ X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
 
 const PassInfo *const llvm::PHIEliminationID = &X;
 
+namespace llvm { FunctionPass *createLocalRegisterAllocator(); }
+
+// Should we run edge splitting?
+static bool shouldSplitEdges() {
+  // Edge splitting breaks the local register allocator. It cannot tolerate
+  // LiveVariables being run.
+  if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator)
+    return false;
+  return SplitEdges;
+}
+
 void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
   AU.addPreserved<LiveVariables>();
-  AU.addPreservedID(MachineLoopInfoID);
-  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreserved<MachineDominatorTree>();
+  if (shouldSplitEdges()) {
+    AU.addRequired<LiveVariables>();
+  } else {
+    AU.setPreservesCFG();
+    AU.addPreservedID(MachineLoopInfoID);
+  }
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -53,10 +76,16 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
 
   PHIDefs.clear();
   PHIKills.clear();
-  analyzePHINodes(Fn);
-
   bool Changed = false;
 
+  // Split critical edges to help the coalescer
+  if (shouldSplitEdges())
+    for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+      Changed |= SplitPHIEdges(Fn, *I);
+
+  // Populate VRegPHIUseCount
+  analyzePHINodes(Fn);
+
   // Eliminate PHI instructions by inserting copies into predecessor blocks.
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
     Changed |= EliminatePHINodes(Fn, *I);
@@ -75,7 +104,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   return Changed;
 }
 
-
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
 ///
@@ -107,26 +135,28 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
   return true;
 }
 
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
-// This needs to be after any def or uses of SrcReg, but before any subsequent
-// point where control flow might jump out of the basic block.
+// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
 MachineBasicBlock::iterator
 llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
+                                          MachineBasicBlock &SuccMBB,
                                           unsigned SrcReg) {
   // Handle the trivial case trivially.
   if (MBB.empty())
     return MBB.begin();
 
-  // If this basic block does not contain an invoke, then control flow always
-  // reaches the end of it, so place the copy there.  The logic below works in
-  // this case too, but is more expensive.
-  if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator()))
+  // Usually, we just want to insert the copy before the first terminator
+  // instruction. However, for the edge going to a landing pad, we must insert
+  // the copy before the call/invoke instruction.
+  if (!SuccMBB.isLandingPad())
     return MBB.getFirstTerminator();
 
-  // Discover any definition/uses in this basic block.
+  // Discover any defs/uses in this basic block.
   SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
   for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-       RE = MRI->reg_end(); RI != RE; ++RI) {
+         RE = MRI->reg_end(); RI != RE; ++RI) {
     MachineInstr *DefUseMI = &*RI;
     if (DefUseMI->getParent() == &MBB)
       DefUsesInMBB.insert(DefUseMI);
@@ -134,14 +164,14 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
 
   MachineBasicBlock::iterator InsertPoint;
   if (DefUsesInMBB.empty()) {
-    // No def/uses.  Insert the copy at the start of the basic block.
+    // No defs.  Insert the copy at the start of the basic block.
     InsertPoint = MBB.begin();
   } else if (DefUsesInMBB.size() == 1) {
-    // Insert the copy immediately after the definition/use.
+    // Insert the copy immediately after the def/use.
     InsertPoint = *DefUsesInMBB.begin();
     ++InsertPoint;
   } else {
-    // Insert the copy immediately after the last definition/use.
+    // Insert the copy immediately after the last def/use.
     InsertPoint = MBB.end();
     while (!DefUsesInMBB.count(&*--InsertPoint)) {}
     ++InsertPoint;
@@ -155,7 +185,7 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
 /// under the assuption that it needs to be lowered in a way that supports
 /// atomic execution of PHIs.  This lowering method is always correct all of the
 /// time.
-///  
+///
 void llvm::PHIElimination::LowerAtomicPHINode(
                                       MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator AfterPHIsIt) {
@@ -186,7 +216,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
   }
 
   // Record PHI def.
-  assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); 
+  assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
   PHIDefs[DestReg] = &MBB;
 
   // Update live variable information if there is any.
@@ -250,92 +280,35 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     // basic block.
     if (!MBBsInsertedInto.insert(&opBlock))
       continue;  // If the copy has already been emitted, we're done.
- 
+
     // Find a safe location to insert the copy, this may be the first terminator
     // in the block (or end()).
-    MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg);
+    MachineBasicBlock::iterator InsertPos =
+      FindCopyInsertPoint(opBlock, MBB, SrcReg);
 
     // Insert the copy.
     TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
 
     // Now update live variable information if we have it.  Otherwise we're done
     if (!LV) continue;
-    
+
     // We want to be able to insert a kill of the register if this PHI (aka, the
     // copy we just inserted) is the last use of the source value.  Live
     // variable analysis conservatively handles this by saying that the value is
     // live until the end of the block the PHI entry lives in.  If the value
     // really is dead at the PHI copy, there will be no successor blocks which
     // have the value live-in.
-    //
-    // Check to see if the copy is the last use, and if so, update the live
-    // variables information so that it knows the copy source instruction kills
-    // the incoming value.
-    LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
-
-    // Loop over all of the successors of the basic block, checking to see if
-    // the value is either live in the block, or if it is killed in the block.
+
     // Also check to see if this register is in use by another PHI node which
     // has not yet been eliminated.  If so, it will be killed at an appropriate
     // point later.
 
     // Is it used by any PHI instructions in this block?
-    bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
-
-    std::vector<MachineBasicBlock*> OpSuccBlocks;
-    
-    // Otherwise, scan successors, including the BB the PHI node lives in.
-    for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
-           E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
-      MachineBasicBlock *SuccMBB = *SI;
-
-      // Is it alive in this successor?
-      unsigned SuccIdx = SuccMBB->getNumber();
-      if (InRegVI.AliveBlocks.test(SuccIdx)) {
-        ValueIsLive = true;
-        break;
-      }
-
-      OpSuccBlocks.push_back(SuccMBB);
-    }
-
-    // Check to see if this value is live because there is a use in a successor
-    // that kills it.
-    if (!ValueIsLive) {
-      switch (OpSuccBlocks.size()) {
-      case 1: {
-        MachineBasicBlock *MBB = OpSuccBlocks[0];
-        for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
-          if (InRegVI.Kills[i]->getParent() == MBB) {
-            ValueIsLive = true;
-            break;
-          }
-        break;
-      }
-      case 2: {
-        MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
-        for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
-          if (InRegVI.Kills[i]->getParent() == MBB1 || 
-              InRegVI.Kills[i]->getParent() == MBB2) {
-            ValueIsLive = true;
-            break;
-          }
-        break;        
-      }
-      default:
-        std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
-        for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
-          if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
-                                 InRegVI.Kills[i]->getParent())) {
-            ValueIsLive = true;
-            break;
-          }
-      }
-    }        
+    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
 
     // Okay, if we now know that the value is not live out of the block, we can
     // add a kill marker in this block saying that it kills the incoming value!
-    if (!ValueIsLive) {
+    if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) {
       // In our final twist, we have to decide which instruction kills the
       // register.  In most cases this is the copy, however, the first
       // terminator instruction at the end of the block may also use the value.
@@ -346,7 +319,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       if (Term != opBlock.end()) {
         if (Term->readsRegister(SrcReg))
           KillInst = Term;
-      
+
         // Check that no other terminators use values.
 #ifndef NDEBUG
         for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
@@ -357,16 +330,16 @@ void llvm::PHIElimination::LowerAtomicPHINode(
         }
 #endif
       }
-      
+
       // Finally, mark it killed.
       LV->addVirtualRegisterKilled(SrcReg, KillInst);
 
       // This vreg no longer lives all of the way through opBlock.
       unsigned opBlockNum = opBlock.getNumber();
-      InRegVI.AliveBlocks.reset(opBlockNum);
+      LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
     }
   }
-    
+
   // Really delete the PHI instruction now!
   MF.DeleteMachineInstr(MPhi);
   ++NumAtomic;
@@ -386,3 +359,134 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
         ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(),
                                      BBI->getOperand(i).getReg())];
 }
+
+bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
+                                         MachineBasicBlock &MBB) {
+  if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+    return false;   // Quick exit for basic blocks without PHIs.
+  LiveVariables &LV = getAnalysis<LiveVariables>();
+  for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+       BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+      unsigned Reg = BBI->getOperand(i).getReg();
+      MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+      // We break edges when registers are live out from the predecessor block
+      // (not considering PHI nodes). If the register is live in to this block
+      // anyway, we would gain nothing from splitting.
+      if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV))
+        SplitCriticalEdge(PreMBB, &MBB);
+    }
+  }
+  return true;
+}
+
+bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
+                                     LiveVariables &LV) {
+  LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+
+  // Loop over all of the successors of the basic block, checking to see if
+  // the value is either live in the block, or if it is killed in the block.
+  std::vector<MachineBasicBlock*> OpSuccBlocks;
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+         E = MBB.succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccMBB = *SI;
+
+    // Is it alive in this successor?
+    unsigned SuccIdx = SuccMBB->getNumber();
+    if (VI.AliveBlocks.test(SuccIdx))
+      return true;
+    OpSuccBlocks.push_back(SuccMBB);
+  }
+
+  // Check to see if this value is live because there is a use in a successor
+  // that kills it.
+  switch (OpSuccBlocks.size()) {
+  case 1: {
+    MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB)
+        return true;
+    break;
+  }
+  case 2: {
+    MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB1 ||
+          VI.Kills[i]->getParent() == SuccMBB2)
+        return true;
+    break;
+  }
+  default:
+    std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+                             VI.Kills[i]->getParent()))
+        return true;
+  }
+  return false;
+}
+
+bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
+                                    LiveVariables &LV) {
+  LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+
+  if (VI.AliveBlocks.test(MBB.getNumber()))
+    return true;
+
+  // defined in MBB?
+  const MachineInstr *Def = MRI->getVRegDef(Reg);
+  if (Def && Def->getParent() == &MBB)
+    return false;
+
+  // killed in MBB?
+  return VI.findKill(&MBB);
+}
+
+MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
+                                                     MachineBasicBlock *B) {
+  assert(A && B && "Missing MBB end point");
+
+  MachineFunction *MF = A->getParent();
+
+  // We may need to update A's terminator, but we can't do that if AnalyzeBranch
+  // fails. If A uses a jump table, we won't touch it.
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*A, TBB, FBB, Cond))
+    return NULL;
+
+  ++NumSplits;
+
+  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+  MF->push_back(NMBB);
+  DEBUG(errs() << "PHIElimination splitting critical edge:"
+        " BB#" << A->getNumber()
+        << " -- BB#" << NMBB->getNumber()
+        << " -- BB#" << B->getNumber() << '\n');
+
+  A->ReplaceUsesOfBlockWith(B, NMBB);
+  // If A may fall through to B, we may have to insert a branch.
+  if (A->isLayoutSuccessor(B))
+    A->updateTerminator();
+
+  // Insert unconditional "jump B" instruction in NMBB.
+  NMBB->addSuccessor(B);
+  Cond.clear();
+  MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+
+  // Fix PHI nodes in B so they refer to NMBB instead of A
+  for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
+       i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i)
+    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+      if (i->getOperand(ni+1).getMBB() == A)
+        i->getOperand(ni+1).setMBB(NMBB);
+
+  if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
+    LV->addNewBlock(NMBB, A);
+
+  if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
+    MDT->addNewBlock(NMBB, A);
+
+  return NMBB;
+}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 3d02dfd..94716ee 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -89,11 +89,33 @@ namespace llvm {
     ///
     void analyzePHINodes(const MachineFunction& Fn);
 
-    // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
-    // SrcReg.  This needs to be after any def or uses of SrcReg, but before
-    // any subsequent point where control flow might jump out of the basic
-    // block.
+    /// Split critical edges where necessary for good coalescer performance.
+    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB);
+
+    /// isLiveOut - Determine if Reg is live out from MBB, when not
+    /// considering PHI nodes. This means that Reg is either killed by
+    /// a successor block or passed through one.
+    bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
+                   LiveVariables &LV);
+
+    /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI
+    /// source registers. This means that Reg is either killed by MBB or passes
+    /// through it.
+    bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
+                  LiveVariables &LV);
+
+    /// SplitCriticalEdge - Split a critical edge from A to B by
+    /// inserting a new MBB. Update branches in A and PHI instructions
+    /// in B. Return the new block.
+    MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
+                                         MachineBasicBlock *B);
+
+    /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+    /// any def of SrcReg, but before any subsequent point where control flow
+    /// might jump out of the basic block.
     MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+                                                    MachineBasicBlock &SuccMBB,
                                                     unsigned SrcReg);
 
     // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 3ed61a2..5f1f1f3 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -216,13 +216,14 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   // Check for explicit enable/disable of post-ra scheduling.
   TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+  SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
   if (EnablePostRAScheduler.getPosition() > 0) {
     if (!EnablePostRAScheduler)
       return false;
   } else {
     // Check that post-RA scheduling is enabled for this target.
     const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
-    if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode))
+    if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
       return false;
   }
 
@@ -243,7 +244,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
   AntiDepBreaker *ADB = 
     ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
-     (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn) :
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
      ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? 
       (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
 
@@ -602,7 +603,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
 void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+    if (IgnoreAntiDep && 
+        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+      continue;
     ReleaseSucc(SU, &*I, IgnoreAntiDep);
   }
 }
@@ -657,7 +660,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
       available = true;
       for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(),
              E = SUnits[i].Preds.end(); I != E; ++I) {
-        if (I->getKind() != SDep::Anti) {
+        if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output))  {
           available = false;
         } else {
           SUnits[i].NumPredsLeft -= 1;
@@ -736,7 +739,9 @@ void SchedulePostRATDList::ListScheduleTopDown(
         AntiDepBreaker::AntiDepRegVector AntiDepRegs;
         for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(),
                E = FoundSUnit->Preds.end(); I != E; ++I) {
-          if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled)
+          if (((I->getKind() == SDep::Anti) || 
+               (I->getKind() == SDep::Output)) &&
+              !I->getSUnit()->isScheduled)
             AntiDepRegs.push_back(I->getReg());
         }
         
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index cce5ae8..8f62345 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -39,8 +39,10 @@
 using namespace llvm;
 
 static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
+                                   cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
+                                     cl::Hidden);
 
 STATISTIC(NumSplits, "Number of intervals split");
 STATISTIC(NumRemats, "Number of intervals split by rematerialization");
@@ -131,17 +133,14 @@ namespace {
 
 
   private:
-    MachineBasicBlock::iterator
-      findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
-                        SlotIndex&);
 
     MachineBasicBlock::iterator
       findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
-                     SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
+                     SmallPtrSet<MachineInstr*, 4>&);
 
     MachineBasicBlock::iterator
       findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
-                     SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
+                     SmallPtrSet<MachineInstr*, 4>&);
 
     int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
 
@@ -161,7 +160,6 @@ namespace {
     bool Rematerialize(unsigned vreg, VNInfo* ValNo,
                        MachineInstr* DefMI,
                        MachineBasicBlock::iterator RestorePt,
-                       SlotIndex RestoreIdx,
                        SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
     MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
                             MachineInstr* DefMI,
@@ -208,24 +206,6 @@ X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
 
 const PassInfo *const llvm::PreAllocSplittingID = &X;
 
-
-/// findNextEmptySlot - Find a gap after the given machine instruction in the
-/// instruction index map. If there isn't one, return end().
-MachineBasicBlock::iterator
-PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
-                                     SlotIndex &SpotIndex) {
-  MachineBasicBlock::iterator MII = MI;
-  if (++MII != MBB->end()) {
-    SlotIndex Index =
-      LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
-    if (Index != SlotIndex()) {
-      SpotIndex = Index;
-      return MII;
-    }
-  }
-  return MBB->end();
-}
-
 /// findSpillPoint - Find a gap as far away from the given MI that's suitable
 /// for spilling the current live interval. The index must be before any
 /// defs and uses of the live interval register in the mbb. Return begin() if
@@ -233,8 +213,7 @@ PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
 MachineBasicBlock::iterator
 PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
                                   MachineInstr *DefMI,
-                                  SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                  SlotIndex &SpillIndex) {
+                                  SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
   MachineBasicBlock::iterator Pt = MBB->begin();
 
   MachineBasicBlock::iterator MII = MI;
@@ -247,8 +226,6 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
   if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
     
   while (MII != EndPt && !RefsInMBB.count(MII)) {
-    SlotIndex Index = LIs->getInstructionIndex(MII);
-    
     // We can't insert the spill between the barrier (a call), and its
     // corresponding call frame setup.
     if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
@@ -259,9 +236,8 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
         }
       }
       continue;
-    } else if (LIs->hasGapBeforeInstr(Index)) {
+    } else {
       Pt = MII;
-      SpillIndex = LIs->findGapBeforeInstr(Index, true);
     }
     
     if (RefsInMBB.count(MII))
@@ -281,8 +257,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
 MachineBasicBlock::iterator
 PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
                                     SlotIndex LastIdx,
-                                    SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                    SlotIndex &RestoreIndex) {
+                                    SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
   // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
   // begin index accordingly.
   MachineBasicBlock::iterator Pt = MBB->end();
@@ -306,7 +281,6 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
     SlotIndex Index = LIs->getInstructionIndex(MII);
     if (Index > LastIdx)
       break;
-    SlotIndex Gap = LIs->findGapBeforeInstr(Index);
       
     // We can't insert a restore between the barrier (a call) and its 
     // corresponding call frame teardown.
@@ -315,9 +289,8 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
         if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
         ++MII;
       } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
-    } else if (Gap != SlotIndex()) {
+    } else {
       Pt = MII;
-      RestoreIndex = Gap;
     }
     
     if (RefsInMBB.count(MII))
@@ -339,7 +312,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (I != IntervalSSMap.end()) {
     SS = I->second;
   } else {
-    SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
     IntervalSSMap[Reg] = SS;
   }
 
@@ -364,10 +337,10 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
   if (!DefMBB)
     return false;
 
-  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+  DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
   if (I == IntervalSSMap.end())
     return false;
-  DenseMap<SlotIndex, SlotIndex>::iterator
+  DenseMap<SlotIndex, SlotIndex>::const_iterator
     II = Def2SpillMap.find(DefIndex);
   if (II == Def2SpillMap.end())
     return false;
@@ -740,7 +713,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     DefIdx = DefIdx.getDefIndex();
     
     assert(DI->getOpcode() != TargetInstrInfo::PHI &&
-           "Following NewVN isPHIDef flag incorrect. Fix me!");
+           "PHI instr in code during pre-alloc splitting.");
     VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
     
     // If the def is a move, set the copy field.
@@ -896,25 +869,22 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
 bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
                                       MachineInstr* DefMI,
                                       MachineBasicBlock::iterator RestorePt,
-                                      SlotIndex RestoreIdx,
                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
   MachineBasicBlock& MBB = *RestorePt->getParent();
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  SlotIndex KillIdx;
   if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
-    KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
+    KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
   else
-    KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
+    KillPt = next(MachineBasicBlock::iterator(DefMI));
   
   if (KillPt == DefMI->getParent()->end())
     return false;
   
-  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI);
-  LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
+  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI);
+  SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
   
   ReconstructLiveInterval(CurrLI);
-  SlotIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
   RematIdx = RematIdx.getDefIndex();
   RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
   
@@ -955,7 +925,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
   if (I != IntervalSSMap.end()) {
     SS = I->second;
   } else {
-    SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
   }
   
   MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
@@ -1086,17 +1056,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Find a point to restore the value after the barrier.
-  SlotIndex RestoreIndex;
   MachineBasicBlock::iterator RestorePt =
-    findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
+    findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
   if (RestorePt == BarrierMBB->end()) {
     DEBUG(errs() << "FAILED (could not find a suitable restore point).\n");
     return false;
   }
 
   if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
-    if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt,
-                      RestoreIndex, RefsInMBB)) {
+    if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
       DEBUG(errs() << "success (remat).\n");
       return true;
     }
@@ -1114,7 +1082,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       SpillIndex = LIs->getInstructionIndex(SpillMI);
     } else {
       MachineBasicBlock::iterator SpillPt = 
-        findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex);
+        findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
       if (SpillPt == BarrierMBB->begin()) {
         DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
         return false; // No gap to insert spill.
@@ -1124,10 +1092,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
       TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
       SpillMI = prior(SpillPt);
-      LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
     }
   } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
-                                 RestoreIndex, SpillIndex, SS)) {
+                                 LIs->getZeroIndex(), SpillIndex, SS)) {
     // If it's already split, just restore the value. There is no need to spill
     // the def again.
     if (!DefMI) {
@@ -1144,13 +1112,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       if (DefMBB == BarrierMBB) {
         // Add spill after the def and the last use before the barrier.
         SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
-                                 RefsInMBB, SpillIndex);
+                                 RefsInMBB);
         if (SpillPt == DefMBB->begin()) {
           DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
         }
       } else {
-        SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex);
+        SpillPt = next(MachineBasicBlock::iterator(DefMI));
         if (SpillPt == DefMBB->end()) {
           DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
@@ -1160,7 +1128,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
       TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
       SpillMI = prior(SpillPt);
-      LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
     }
   }
 
@@ -1170,6 +1138,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
 
   // Add restore.
   bool FoldedRestore = false;
+  SlotIndex RestoreIndex;
   if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
                                       BarrierMBB, SS, RefsInMBB)) {
     RestorePt = LMI;
@@ -1178,7 +1147,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   } else {
     TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
     MachineInstr *LoadMI = prior(RestorePt);
-    LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex);
+    RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
   }
 
   // Update spill stack slot live interval.
@@ -1398,7 +1367,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
       // Otherwise, this is a load-store case, so DCE them.
       for (SmallPtrSet<MachineInstr*, 4>::iterator UI = 
            VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
-           UI != UI; ++UI) {
+           UI != UE; ++UI) {
         LIs->RemoveMachineInstrFromMaps(*UI);
         (*UI)->eraseFromParent();
       }
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 48567a0..455964b 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -77,6 +77,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
   SmallVector<MachineInstr*, 8> ImpDefMIs;
   MachineBasicBlock *Entry = fn.begin();
   SmallPtrSet<MachineBasicBlock*,16> Visited;
+  SmallPtrSet<MachineInstr*, 8> ModInsts;
 
   for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
          DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
@@ -201,6 +202,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         MachineOperand &RMO = UI.getOperand();
         MachineInstr *RMI = &*UI;
         ++UI;
+        if (ModInsts.count(RMI))
+          continue;
         MachineBasicBlock *RMBB = RMI->getParent();
         if (RMBB == MBB)
           continue;
@@ -209,9 +212,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
         if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
             Reg == SrcReg) {
+          if (RMO.isKill()) {
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            vi.removeKill(RMI);
+          }
           RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
           for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
             RMI->RemoveOperand(j);
+          ModInsts.insert(RMI);
           continue;
         }
 
@@ -222,6 +230,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         RMO.setIsKill();
       }
     }
+    ModInsts.clear();
     ImpDefRegs.clear();
     ImpDefMIs.clear();
   }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 230a20c..8905f75 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -264,7 +264,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset);
+      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+                                        true, false);
     }
 
     I->setFrameIdx(FrameIdx);
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 5507646..7fb3e6e 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -43,35 +43,14 @@ static const char *const PSVNames[] = {
 // Eventually these should be uniqued on LLVMContext rather than in a managed
 // static.  For now, we can safely use the global context for the time being to
 // squeak by.
-PseudoSourceValue::PseudoSourceValue() :
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
   Value(Type::getInt8PtrTy(getGlobalContext()),
-        PseudoSourceValueVal) {}
+        Subclass) {}
 
 void PseudoSourceValue::printCustom(raw_ostream &O) const {
   O << PSVNames[this - *PSVs];
 }
 
-namespace {
-  /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
-  /// for holding FixedStack values, which must include a frame
-  /// index.
-  class FixedStackPseudoSourceValue : public PseudoSourceValue {
-    const int FI;
-  public:
-    explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {}
-
-    virtual bool isConstant(const MachineFrameInfo *MFI) const;
-
-    virtual bool isAliased(const MachineFrameInfo *MFI) const;
-
-    virtual bool mayAlias(const MachineFrameInfo *) const;
-
-    virtual void printCustom(raw_ostream &OS) const {
-      OS << "FixedStack" << FI;
-    }
-  };
-}
-
 static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
 
 const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
@@ -130,3 +109,7 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
   // Spill slots will not alias any LLVM IR value.
   return !MFI->isSpillSlotObjectIndex(FI);
 }
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+  OS << "FixedStack" << FI;
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 1957c16..7bb020a 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -261,8 +261,8 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
     return SS;          // Already has space allocated?
 
   // Allocate a new stack object for this spill location...
-  int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),true);
+  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                            RC->getAlignment());
 
   // Assign the slot...
   StackSlotForVirtReg[VirtReg] = FrameIdx;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 5757e47..c677d34 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -693,6 +693,11 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
 }
 
 bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
+
+  // Assert that this is a valid solution to the regalloc problem.
+  assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+         "Invalid (infinite cost) solution for PBQP problem.");
+
   // Set to true if we have any spills
   bool anotherRoundNeeded = false;
 
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index cf90aba..94680ed 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -100,11 +100,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
         CalleeSavedRegs.set(CSRegs[i]);
   }
 
-  // RS used within emit{Pro,Epi}logue()
-  if (mbb != MBB) {
-    MBB = mbb;
-    initRegState();
-  }
+  MBB = mbb;
+  initRegState();
 
   Tracking = false;
 }
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1363a92..6b27db2 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -214,7 +214,10 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
     unsigned MaxPredDepth = 0;
     for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
          E = Cur->Preds.end(); I != E; ++I) {
-      if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+      if (IgnoreAntiDep && 
+          ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
+        continue;
+
       SUnit *PredSU = I->getSUnit();
       if (PredSU->isDepthCurrent)
         MaxPredDepth = std::max(MaxPredDepth,
@@ -248,7 +251,10 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) {
     unsigned MaxSuccHeight = 0;
     for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
          E = Cur->Succs.end(); I != E; ++I) {
-      if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+      if (IgnoreAntiDep && 
+          ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
+        continue;
+
       SUnit *SuccSU = I->getSUnit();
       if (SuccSU->isHeightCurrent)
         MaxSuccHeight = std::max(MaxSuccHeight,
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index f8b219d..56dd533 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -112,12 +112,13 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 
   V = getUnderlyingObject(V);
   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    MayAlias = PSV->mayAlias(MFI);
     // For now, ignore PseudoSourceValues which may alias LLVM IR values
     // because the code that uses this function has no way to cope with
     // such aliases.
     if (PSV->isAliased(MFI))
       return 0;
+    
+    MayAlias = PSV->mayAlias(MFI);
     return V;
   }
 
@@ -127,23 +128,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
   return 0;
 }
 
-static bool mayUnderlyingObjectForInstrAlias(const MachineInstr *MI,
-                                             const MachineFrameInfo *MFI) {
-  if (!MI->hasOneMemOperand() ||
-      !(*MI->memoperands_begin())->getValue() ||
-      (*MI->memoperands_begin())->isVolatile())
-    return true;
-
-  const Value *V = (*MI->memoperands_begin())->getValue();
-  if (!V)
-    return true;
-
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
-    return PSV->mayAlias(MFI);
-  return true;
-}
-
 void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
   if (MachineLoop *ML = MLI.getLoopFor(BB))
     if (BB == ML->getLoopLatch()) {
@@ -163,16 +147,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // We build scheduling units by walking a block's instruction list from bottom
   // to top.
 
-  // Remember where a generic side-effecting instruction is as we procede. If
-  // ChainMMO is null, this is assumed to have arbitrary side-effects. If
-  // ChainMMO is non-null, then Chain makes only a single memory reference.
-  SUnit *Chain = 0;
-  MachineMemOperand *ChainMMO = 0;
+  // Remember where a generic side-effecting instruction is as we procede.
+  SUnit *BarrierChain = 0, *AliasChain = 0;
 
-  // Memory references to specific known memory locations are tracked so that
-  // they can be given more precise dependencies.
-  std::map<const Value *, SUnit *> MemDefs;
-  std::map<const Value *, std::vector<SUnit *> > MemUses;
+  // Memory references to specific known memory locations are tracked
+  // so that they can be given more precise dependencies. We track
+  // separately the known memory locations that may alias and those
+  // that are known not to alias
+  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
@@ -347,114 +330,132 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
-    new_chain:
-      // This is the conservative case. Add dependencies on all memory
-      // references.
-      if (Chain)
-        Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-      Chain = SU;
+    if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+        (MI->hasVolatileMemoryRef() && 
+         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+      // Be conservative with these and add dependencies on all memory
+      // references, even those that are known to not alias.
+      for (std::map<const Value *, SUnit *>::iterator I = 
+             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      }
+      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      }
+      NonAliasMemDefs.clear();
+      NonAliasMemUses.clear();
+      // Add SU to the barrier chain.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      BarrierChain = SU;
+
+      // fall-through
+    new_alias_chain:
+      // Chain all possibly aliasing memory references though SU.
+      if (AliasChain)
+        AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      AliasChain = SU;
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
         PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
-      PendingLoads.clear();
-      for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
-           E = MemDefs.end(); I != E; ++I) {
+      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+           E = AliasMemDefs.end(); I != E; ++I) {
         I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-        I->second = SU;
       }
       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
-           MemUses.begin(), E = MemUses.end(); I != E; ++I) {
+           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
           I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
-        I->second.clear();
-        I->second.push_back(SU);
       }
-      // See if it is known to just have a single memory reference.
-      MachineInstr *ChainMI = Chain->getInstr();
-      const TargetInstrDesc &ChainTID = ChainMI->getDesc();
-      if (!ChainTID.isCall() &&
-          !ChainTID.hasUnmodeledSideEffects() &&
-          ChainMI->hasOneMemOperand() &&
-          !(*ChainMI->memoperands_begin())->isVolatile() &&
-          (*ChainMI->memoperands_begin())->getValue())
-        // We know that the Chain accesses one specific memory location.
-        ChainMMO = *ChainMI->memoperands_begin();
-      else
-        // Unknown memory accesses. Assume the worst.
-        ChainMMO = 0;
+      PendingLoads.clear();
+      AliasMemDefs.clear();
+      AliasMemUses.clear();
     } else if (TID.mayStore()) {
       bool MayAlias = true;
       TrueMemOrderLatency = STORE_LOAD_LATENCY;
       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
         // A store to a specific PseudoSourceValue. Add precise dependencies.
-        // Handle the def in MemDefs, if there is one.
-        std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
-        if (I != MemDefs.end()) {
+        // Record the def in MemDefs, first adding a dep if there is
+        // an existing def.
+        std::map<const Value *, SUnit *>::iterator I = 
+          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+        std::map<const Value *, SUnit *>::iterator IE = 
+          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+        if (I != IE) {
           I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
                                   /*isNormalMemory=*/true));
           I->second = SU;
         } else {
-          MemDefs[V] = SU;
+          if (MayAlias)
+            AliasMemDefs[V] = SU;
+          else
+            NonAliasMemDefs[V] = SU;
         }
         // Handle the uses in MemUses, if there are any.
         std::map<const Value *, std::vector<SUnit *> >::iterator J =
-          MemUses.find(V);
-        if (J != MemUses.end()) {
+          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+        if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
             J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
                                        /*Reg=*/0, /*isNormalMemory=*/true));
           J->second.clear();
         }
         if (MayAlias) {
-          // Add dependencies from all the PendingLoads, since without
-          // memoperands we must assume they alias anything.
+          // Add dependencies from all the PendingLoads, i.e. loads
+          // with no underlying object.
           for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
             PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
-          // Add a general dependence too, if needed.
-          if (Chain)
-            Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+          // Add dependence on alias chain, if needed.
+          if (AliasChain)
+            AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
         }
+        // Add dependence on barrier chain, if needed.
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
       } else {
         // Treat all other stores conservatively.
-        goto new_chain;
+        goto new_alias_chain;
       }
     } else if (TID.mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
-      } else if (const Value *V = 
-                     getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
-        // A load from a specific PseudoSourceValue. Add precise dependencies.
-        std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
-        if (I != MemDefs.end())
-          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
-                                  /*isNormalMemory=*/true));
-        MemUses[V].push_back(SU);
-
-        // Add a general dependence too, if needed.
-        if (Chain && (!ChainMMO ||
-                      (ChainMMO->isStore() || ChainMMO->isVolatile())))
-          Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-      } else if (MI->hasVolatileMemoryRef()) {
-        // Treat volatile loads conservatively. Note that this includes
-        // cases where memoperand information is unavailable.
-        goto new_chain;
       } else {
-        // A "MayAlias" load. Depend on the general chain, as well as on
-        // all stores. In the absense of MachineMemOperand information,
-        // we can't even assume that the load doesn't alias well-behaved
-        // memory locations.
-        if (Chain)
-          Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-        for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
-               E = MemDefs.end(); I != E; ++I) {
-          SUnit *DefSU = I->second;
-          if (mayUnderlyingObjectForInstrAlias(DefSU->getInstr(), MFI))
-            DefSU->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        if (const Value *V = 
+            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          std::map<const Value *, SUnit *>::iterator I = 
+            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          std::map<const Value *, SUnit *>::iterator IE = 
+            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+                                    /*isNormalMemory=*/true));
+          if (MayAlias)
+            AliasMemUses[V].push_back(SU);
+          else 
+            NonAliasMemUses[V].push_back(SU);
+        } else {
+          // A load with no underlying object. Depend on all
+          // potentially aliasing stores.
+          for (std::map<const Value *, SUnit *>::iterator I = 
+                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+          
+          PendingLoads.push_back(SU);
+          MayAlias = true;
         }
-        PendingLoads.push_back(SU);
-      }
+        
+        // Add dependencies on alias and barrier chains, if needed.
+        if (MayAlias && AliasChain)
+          AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      } 
     }
   }
 
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index fbe40b6..38839c4 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -77,6 +77,21 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
   }
 }
 
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
+                          const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+                          CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
+    EVT VT = OutTys[i];
+    ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+      return false;
+  }
+  return true;
+}
+
 /// AnalyzeReturn - Analyze the returned values of a return,
 /// incorporating info about the result values into this state.
 void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5f70cb8..06ffdd6 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -37,7 +37,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <set>
 using namespace llvm;
 
 STATISTIC(NodesCombined   , "Number of dag nodes combined");
@@ -4443,14 +4442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   SDValue Chain = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
 
-  // never taken branch, fold to chain
-  if (N1C && N1C->isNullValue())
-    return Chain;
-  // unconditional branch
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   // on the target.
   if (N1.getOpcode() == ISD::SETCC &&
@@ -4517,22 +4515,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
 
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
   // Use SimplifySetCC to simplify SETCC's.
   SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
                                CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
                                false);
   if (Simp.getNode()) AddToWorkList(Simp.getNode());
 
-  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
-
-  // fold br_cc true, dest -> br dest (unconditional branch)
-  if (SCCC && !SCCC->isNullValue())
-    return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
-                       N->getOperand(0), N->getOperand(4));
-  // fold br_cc false, dest -> unconditional fall through
-  if (SCCC && SCCC->isNullValue())
-    return N->getOperand(0);
-
   // fold to a simpler setcc
   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
     return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8e955af..7dbc136 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -324,82 +325,12 @@ bool FastISel::SelectCall(User *I) {
   unsigned IID = F->getIntrinsicID();
   switch (IID) {
   default: break;
-  case Intrinsic::dbg_stoppoint: {
-    DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
-    if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::None))
-      setCurDebugLoc(ExtractDebugLocation(*SPI, MF.getDebugLocInfo()));
+  case Intrinsic::dbg_stoppoint: 
+  case Intrinsic::dbg_region_start: 
+  case Intrinsic::dbg_region_end: 
+  case Intrinsic::dbg_func_start:
+    // FIXME - Remove this instructions once the dust settles.
     return true;
-  }
-  case Intrinsic::dbg_region_start: {
-    DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);
-    if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW
-        && DW->ShouldEmitDwarfDebug()) {
-      unsigned ID = 
-        DW->RecordRegionStart(RSI->getContext());
-      const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-      BuildMI(MBB, DL, II).addImm(ID);
-    }
-    return true;
-  }
-  case Intrinsic::dbg_region_end: {
-    DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);
-    if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW
-        && DW->ShouldEmitDwarfDebug()) {
-     unsigned ID = 0;
-     DISubprogram Subprogram(REI->getContext());
-     if (isInlinedFnEnd(*REI, MF.getFunction())) {
-        // This is end of an inlined function.
-        const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-        ID = DW->RecordInlinedFnEnd(Subprogram);
-        if (ID)
-          // Returned ID is 0 if this is unbalanced "end of inlined
-          // scope". This could happen if optimizer eats dbg intrinsics
-          // or "beginning of inlined scope" is not recoginized due to
-          // missing location info. In such cases, ignore this region.end.
-          BuildMI(MBB, DL, II).addImm(ID);
-      } else {
-        const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-        ID =  DW->RecordRegionEnd(REI->getContext());
-        BuildMI(MBB, DL, II).addImm(ID);
-      }
-    }
-    return true;
-  }
-  case Intrinsic::dbg_func_start: {
-    DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
-    if (!isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::None) || !DW
-        || !DW->ShouldEmitDwarfDebug()) 
-      return true;
-
-    if (isInlinedFnStart(*FSI, MF.getFunction())) {
-      // This is a beginning of an inlined function.
-      
-      // If llvm.dbg.func.start is seen in a new block before any
-      // llvm.dbg.stoppoint intrinsic then the location info is unknown.
-      // FIXME : Why DebugLoc is reset at the beginning of each block ?
-      DebugLoc PrevLoc = DL;
-      if (PrevLoc.isUnknown())
-        return true;
-      // Record the source line.
-      setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
-      
-      DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-      DISubprogram SP(FSI->getSubprogram());
-      unsigned LabelID = 
-        DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope),
-                                 PrevLocTpl.Line, PrevLocTpl.Col);
-      const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-      BuildMI(MBB, DL, II).addImm(LabelID);
-      return true;
-    }
-    
-    // This is a beginning of a new function.
-    MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
-    
-    // llvm.dbg.func_start also defines beginning of function scope.
-    DW->RecordRegionStart(FSI->getSubprogram());
-    return true;
-  }
   case Intrinsic::dbg_declare: {
     DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
     if (!isValidDebugInfoIntrinsic(*DI, CodeGenOpt::None) || !DW
@@ -416,11 +347,13 @@ bool FastISel::SelectCall(User *I) {
       StaticAllocaMap.find(AI);
     if (SI == StaticAllocaMap.end()) break; // VLAs.
     int FI = SI->second;
-    if (MMI)
-      MMI->setVariableDbgInfo(DI->getVariable(), FI);
-#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN
-    DW->RecordVariable(DI->getVariable(), FI);
-#endif
+    if (MMI) {
+      MetadataContext &TheMetadata = 
+        DI->getParent()->getContext().getMetadata();
+      unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+      MDNode *Dbg = TheMetadata.getMD(MDDbgKind, DI);
+      MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+    }
     return true;
   }
   case Intrinsic::eh_exception: {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index da311ed..52b0832 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -497,7 +497,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
   assert(isNew && "Node emitted out of order - early");
 }
 
-/// EmitNode - Generate machine code for an node and needed dependencies.
+/// EmitNode - Generate machine code for a node and needed dependencies.
 ///
 void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
                             DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index bb4634d..91817e4 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -97,7 +97,7 @@ public:
   /// MachineInstr.
   static unsigned CountOperands(SDNode *Node);
 
-  /// EmitNode - Generate machine code for an node and needed dependencies.
+  /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
                 DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f389f7f..4f0a229 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -148,8 +148,11 @@ private:
   SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
                           RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
                           RTLIB::Libcall Call_PPCF128);
-  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16,
-                           RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+                           RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16,
+                           RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
 
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
@@ -1810,10 +1813,19 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
         CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
       } else if (ConstantSDNode *V =
                  dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
-        CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+        if (OpVT==EltVT)
+          CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+        else {
+          // If OpVT and EltVT don't match, EltVT is not legal and the
+          // element values have been promoted/truncated earlier.  Undo this;
+          // we don't want a v16i8 to become a v16i32 for example.
+          const ConstantInt *CI = V->getConstantIntValue();
+          CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+                                        CI->getZExtValue()));
+        }
       } else {
         assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
-        const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext());
+        const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
         CV.push_back(UndefValue::get(OpNTy));
       }
     }
@@ -1909,6 +1921,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
 }
 
 SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+                                               RTLIB::Libcall Call_I8,
                                                RTLIB::Libcall Call_I16,
                                                RTLIB::Libcall Call_I32,
                                                RTLIB::Libcall Call_I64,
@@ -1916,9 +1929,10 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unexpected request for libcall!");
-  case MVT::i16: LC = Call_I16; break;
-  case MVT::i32: LC = Call_I32; break;
-  case MVT::i64: LC = Call_I64; break;
+  case MVT::i8:   LC = Call_I8; break;
+  case MVT::i16:  LC = Call_I16; break;
+  case MVT::i32:  LC = Call_I32; break;
+  case MVT::i64:  LC = Call_I64; break;
   case MVT::i128: LC = Call_I128; break;
   }
   return ExpandLibCall(LC, Node, isSigned);
@@ -2624,10 +2638,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
       Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
     } else if (isSigned) {
-      Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32,
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SREM_I8,
+                              RTLIB::SREM_I16, RTLIB::SREM_I32,
                               RTLIB::SREM_I64, RTLIB::SREM_I128);
     } else {
-      Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32,
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UREM_I8,
+                              RTLIB::UREM_I16, RTLIB::UREM_I32,
                               RTLIB::UREM_I64, RTLIB::UREM_I128);
     }
     Results.push_back(Tmp1);
@@ -2643,10 +2661,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
                          Node->getOperand(1));
     else if (isSigned)
-      Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SDIV_I8,
+                              RTLIB::SDIV_I16, RTLIB::SDIV_I32,
                               RTLIB::SDIV_I64, RTLIB::SDIV_I128);
     else
-      Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UDIV_I8,
+                              RTLIB::UDIV_I16, RTLIB::UDIV_I32,
                               RTLIB::UDIV_I64, RTLIB::UDIV_I128);
     Results.push_back(Tmp1);
     break;
@@ -2691,7 +2713,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                     Node->getOperand(1)));
       break;
     }
-    Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32,
+    Tmp1 = ExpandIntLibCall(Node, false,
+                            RTLIB::MUL_I8,
+                            RTLIB::MUL_I16, RTLIB::MUL_I32,
                             RTLIB::MUL_I64, RTLIB::MUL_I128);
     Results.push_back(Tmp1);
     break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 98e7317..4530ffc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1270,11 +1270,12 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
     return Val;
 
   FoldingSetNodeID ID;
+  SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+  AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
-  SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
   new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
@@ -1378,7 +1379,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   unsigned StackAlign =
   std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
 
-  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
   return getFrameIndex(FrameIdx, TLI.getPointerTy());
 }
 
@@ -1394,7 +1395,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
                             TD->getPrefTypeAlignment(Ty2));
 
   MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align);
+  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
   return getFrameIndex(FrameIdx, TLI.getPointerTy());
 }
 
@@ -5814,9 +5815,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
 
 void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
   print_types(OS, G);
-  OS << " ";
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    if (i) OS << ", ";
+    if (i) OS << ", "; else OS << " ";
     OS << (void*)getOperand(i).getNode();
     if (unsigned RN = getOperand(i).getResNo())
       OS << ":" << RN;
@@ -5916,7 +5916,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
                                         APInt &SplatUndef,
                                         unsigned &SplatBitSize,
                                         bool &HasAnyUndefs,
-                                        unsigned MinSplatBits) {
+                                        unsigned MinSplatBits,
+                                        bool isBigEndian) {
   EVT VT = getValueType(0);
   assert(VT.isVector() && "Expected a vector type");
   unsigned sz = VT.getSizeInBits();
@@ -5933,12 +5934,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   unsigned int nOps = getNumOperands();
   assert(nOps > 0 && "isConstantSplat has 0-size build vector");
   unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
-  for (unsigned i = 0; i < nOps; ++i) {
+
+  for (unsigned j = 0; j < nOps; ++j) {
+    unsigned i = isBigEndian ? nOps-1-j : j;
     SDValue OpVal = getOperand(i);
-    unsigned BitPos = i * EltBitSize;
+    unsigned BitPos = j * EltBitSize;
 
     if (OpVal.getOpcode() == ISD::UNDEF)
-      SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize);
+      SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
     else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
       SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
                      zextOrTrunc(sz) << BitPos);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index c0d2a4d..90fd95e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/GCStrategy.h"
@@ -304,7 +305,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
         TySize *= CUI->getZExtValue();   // Get total allocated size.
         if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
         StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align);
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
       }
 
   for (; BB != EB; ++BB)
@@ -334,25 +335,6 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
     DebugLoc DL;
     for (BasicBlock::iterator
            I = BB->begin(), E = BB->end(); I != E; ++I) {
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        if (Function *F = CI->getCalledFunction()) {
-          switch (F->getIntrinsicID()) {
-          default: break;
-          case Intrinsic::dbg_stoppoint: {
-            DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
-            if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::Default)) 
-              DL = ExtractDebugLocation(*SPI, MF->getDebugLocInfo());
-            break;
-          }
-          case Intrinsic::dbg_func_start: {
-            DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
-            if (isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::Default)) 
-              DL = ExtractDebugLocation(*FSI, MF->getDebugLocInfo());
-            break;
-          }
-          }
-        }
-      }
 
       PN = dyn_cast<PHINode>(I);
       if (!PN || PN->use_empty()) continue;
@@ -947,58 +929,143 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
   return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
 }
 
+/// Get the EVTs and ArgFlags collections that represent the return type
+/// of the given function.  This does not require a DAG or a return value, and
+/// is suitable for use before any DAGs for the function are constructed.
+static void getReturnInfo(const Type* ReturnType,
+                   Attributes attr, SmallVectorImpl<EVT> &OutVTs,
+                   SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
+                   TargetLowering &TLI,
+                   SmallVectorImpl<uint64_t> *Offsets = 0) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if ( NumValues == 0 ) return;
+
+  for (unsigned j = 0, f = NumValues; j != f; ++j) {
+    EVT VT = ValueVTs[j];
+    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+    if (attr & Attribute::SExt)
+      ExtendKind = ISD::SIGN_EXTEND;
+    else if (attr & Attribute::ZExt)
+      ExtendKind = ISD::ZERO_EXTEND;
+
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling
+    // conventions. The frontend should mark functions whose return values
+    // require promoting with signext or zeroext attributes.
+    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      if (VT.bitsLT(MinVT))
+        VT = MinVT;
+    }
+
+    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    // 'inreg' on function refers to return value
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+    if (attr & Attribute::InReg)
+      Flags.setInReg();
+
+    // Propagate extension type if any
+    if (attr & Attribute::SExt)
+      Flags.setSExt();
+    else if (attr & Attribute::ZExt)
+      Flags.setZExt();
+
+    for (unsigned i = 0; i < NumParts; ++i) {
+      OutVTs.push_back(PartVT);
+      OutFlags.push_back(Flags);
+    }
+  }
+}
 
 void SelectionDAGLowering::visitRet(ReturnInst &I) {
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
-  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+  
+  if (!FLI.CanLowerReturn) {
+    unsigned DemoteReg = FLI.DemoteRegister;
+    const Function *F = I.getParent()->getParent();
+
+    // Emit a store of the return value through the virtual register.
+    // Leave Outs empty so that LowerReturn won't try to load return
+    // registers the usual way.
+    SmallVector<EVT, 1> PtrValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 
+                    PtrValueVTs);
+
+    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+    SDValue RetOp = getValue(I.getOperand(0));
+  
     SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+    SmallVector<uint64_t, 4> Offsets;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
     unsigned NumValues = ValueVTs.size();
-    if (NumValues == 0) continue;
-
-    SDValue RetOp = getValue(I.getOperand(i));
-    for (unsigned j = 0, f = NumValues; j != f; ++j) {
-      EVT VT = ValueVTs[j];
 
-      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
-      const Function *F = I.getParent()->getParent();
-      if (F->paramHasAttr(0, Attribute::SExt))
-        ExtendKind = ISD::SIGN_EXTEND;
-      else if (F->paramHasAttr(0, Attribute::ZExt))
-        ExtendKind = ISD::ZERO_EXTEND;
+    SmallVector<SDValue, 4> Chains(NumValues);
+    EVT PtrVT = PtrValueVTs[0];
+    for (unsigned i = 0; i != NumValues; ++i)
+      Chains[i] = DAG.getStore(Chain, getCurDebugLoc(),
+                  SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+                  DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
+                  DAG.getConstant(Offsets[i], PtrVT)),
+                  NULL, Offsets[i], false, 0);
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                        MVT::Other, &Chains[0], NumValues);
+  }
+  else {
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+      unsigned NumValues = ValueVTs.size();
+      if (NumValues == 0) continue;
+  
+      SDValue RetOp = getValue(I.getOperand(i));
+      for (unsigned j = 0, f = NumValues; j != f; ++j) {
+        EVT VT = ValueVTs[j];
+
+        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+        const Function *F = I.getParent()->getParent();
+        if (F->paramHasAttr(0, Attribute::SExt))
+          ExtendKind = ISD::SIGN_EXTEND;
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          ExtendKind = ISD::ZERO_EXTEND;
+
+        // FIXME: C calling convention requires the return type to be promoted to
+        // at least 32-bit. But this is not necessary for non-C calling
+        // conventions. The frontend should mark functions whose return values
+        // require promoting with signext or zeroext attributes.
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+          if (VT.bitsLT(MinVT))
+            VT = MinVT;
+        }
 
-      // FIXME: C calling convention requires the return type to be promoted to
-      // at least 32-bit. But this is not necessary for non-C calling
-      // conventions. The frontend should mark functions whose return values
-      // require promoting with signext or zeroext attributes.
-      if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-        EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
-        if (VT.bitsLT(MinVT))
-          VT = MinVT;
+        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        SmallVector<SDValue, 4> Parts(NumParts);
+        getCopyToParts(DAG, getCurDebugLoc(),
+                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+                       &Parts[0], NumParts, PartVT, ExtendKind);
+
+        // 'inreg' on function refers to return value
+        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+        if (F->paramHasAttr(0, Attribute::InReg))
+          Flags.setInReg();
+
+        // Propagate extension type if any
+        if (F->paramHasAttr(0, Attribute::SExt))
+          Flags.setSExt();
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          Flags.setZExt();
+
+        for (unsigned i = 0; i < NumParts; ++i)
+          Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
       }
-
-      unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
-      EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
-      SmallVector<SDValue, 4> Parts(NumParts);
-      getCopyToParts(DAG, getCurDebugLoc(),
-                     SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                     &Parts[0], NumParts, PartVT, ExtendKind);
-
-      // 'inreg' on function refers to return value
-      ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-      if (F->paramHasAttr(0, Attribute::InReg))
-        Flags.setInReg();
-
-      // Propagate extension type if any
-      if (F->paramHasAttr(0, Attribute::SExt))
-        Flags.setSExt();
-      else if (F->paramHasAttr(0, Attribute::ZExt))
-        Flags.setZExt();
-
-      for (unsigned i = 0; i < NumParts; ++i)
-        Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
     }
   }
 
@@ -1691,19 +1758,19 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
-  const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt& Last  = cast<ConstantInt>(BackCase.High)->getValue();
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
 
-  size_t TSize = 0;
+  APInt TSize(First.getBitWidth(), 0);
   for (CaseItr I = CR.Range.first, E = CR.Range.second;
        I!=E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize <= 3)
+  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
     return false;
 
   APInt Range = ComputeRange(First, Last);
-  double Density = (double)TSize / Range.roundToDouble();
+  double Density = TSize.roundToDouble() / Range.roundToDouble();
   if (Density < 0.4)
     return false;
 
@@ -1797,32 +1864,34 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
   // Size is the number of Cases represented by this range.
   unsigned Size = CR.Range.second - CR.Range.first;
 
-  const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt& Last  = cast<ConstantInt>(BackCase.High)->getValue();
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
   double FMetric = 0;
   CaseItr Pivot = CR.Range.first + Size/2;
 
   // Select optimal pivot, maximizing sum density of LHS and RHS. This will
   // (heuristically) allow us to emit JumpTable's later.
-  size_t TSize = 0;
+  APInt TSize(First.getBitWidth(), 0);
   for (CaseItr I = CR.Range.first, E = CR.Range.second;
        I!=E; ++I)
     TSize += I->size();
 
-  size_t LSize = FrontCase.size();
-  size_t RSize = TSize-LSize;
+  APInt LSize = FrontCase.size();
+  APInt RSize = TSize-LSize;
   DEBUG(errs() << "Selecting best pivot: \n"
                << "First: " << First << ", Last: " << Last <<'\n'
                << "LSize: " << LSize << ", RSize: " << RSize << '\n');
   for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
        J!=E; ++I, ++J) {
-    const APInt& LEnd = cast<ConstantInt>(I->High)->getValue();
-    const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue();
+    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
     APInt Range = ComputeRange(LEnd, RBegin);
     assert((Range - 2ULL).isNonNegative() &&
            "Invalid case distance");
-    double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble();
-    double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble();
+    double LDensity = (double)LSize.roundToDouble() / 
+                           (LEnd - First + 1ULL).roundToDouble();
+    double RDensity = (double)RSize.roundToDouble() /
+                           (Last - RBegin + 1ULL).roundToDouble();
     double Metric = Range.logBase2()*(LDensity+RDensity);
     // Should always split in some non-trivial place
     DEBUG(errs() <<"=>Step\n"
@@ -3842,112 +3911,12 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                                I.getOperand(1), 0, I.getOperand(2), 0));
     return 0;
   }
-  case Intrinsic::dbg_stoppoint: {
-    DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
-    if (isValidDebugInfoIntrinsic(SPI, CodeGenOpt::Default)) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      DebugLoc Loc = ExtractDebugLocation(SPI, MF.getDebugLocInfo());
-      setCurDebugLoc(Loc);
-
-      if (OptLevel == CodeGenOpt::None)
-        DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
-                                        SPI.getLine(),
-                                        SPI.getColumn(),
-                                        SPI.getContext()));
-    }
+  case Intrinsic::dbg_stoppoint: 
+  case Intrinsic::dbg_region_start:
+  case Intrinsic::dbg_region_end:
+  case Intrinsic::dbg_func_start:
+    // FIXME - Remove this instructions once the dust settles.
     return 0;
-  }
-  case Intrinsic::dbg_region_start: {
-    DwarfWriter *DW = DAG.getDwarfWriter();
-    DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
-    if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW
-        && DW->ShouldEmitDwarfDebug()) {
-      unsigned LabelID =
-        DW->RecordRegionStart(RSI.getContext());
-      DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                               getRoot(), LabelID));
-    }
-    return 0;
-  }
-  case Intrinsic::dbg_region_end: {
-    DwarfWriter *DW = DAG.getDwarfWriter();
-    DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
-
-    if (!isValidDebugInfoIntrinsic(REI, OptLevel) || !DW
-        || !DW->ShouldEmitDwarfDebug()) 
-      return 0;
-
-    MachineFunction &MF = DAG.getMachineFunction();
-    DISubprogram Subprogram(REI.getContext());
-    
-    if (isInlinedFnEnd(REI, MF.getFunction())) {
-      // This is end of inlined function. Debugging information for inlined
-      // function is not handled yet (only supported by FastISel).
-      if (OptLevel == CodeGenOpt::None) {
-        unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
-        if (ID != 0)
-          // Returned ID is 0 if this is unbalanced "end of inlined
-          // scope". This could happen if optimizer eats dbg intrinsics or
-          // "beginning of inlined scope" is not recoginized due to missing
-          // location info. In such cases, do ignore this region.end.
-          DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), 
-                                   getRoot(), ID));
-      }
-      return 0;
-    } 
-
-    unsigned LabelID =
-      DW->RecordRegionEnd(REI.getContext());
-    DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                             getRoot(), LabelID));
-    return 0;
-  }
-  case Intrinsic::dbg_func_start: {
-    DwarfWriter *DW = DAG.getDwarfWriter();
-    DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
-    if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None))
-      return 0;
-
-    MachineFunction &MF = DAG.getMachineFunction();
-    // This is a beginning of an inlined function.
-    if (isInlinedFnStart(FSI, MF.getFunction())) {
-      if (OptLevel != CodeGenOpt::None)
-        // FIXME: Debugging informaation for inlined function is only
-        // supported at CodeGenOpt::Node.
-        return 0;
-      
-      DebugLoc PrevLoc = CurDebugLoc;
-      // If llvm.dbg.func.start is seen in a new block before any
-      // llvm.dbg.stoppoint intrinsic then the location info is unknown.
-      // FIXME : Why DebugLoc is reset at the beginning of each block ?
-      if (PrevLoc.isUnknown())
-        return 0;
-      
-      // Record the source line.
-      setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-      
-      if (!DW || !DW->ShouldEmitDwarfDebug())
-        return 0;
-      DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-      DISubprogram SP(FSI.getSubprogram());
-      DICompileUnit CU(PrevLocTpl.Scope);
-      unsigned LabelID = DW->RecordInlinedFnStart(SP, CU,
-                                                  PrevLocTpl.Line,
-                                                  PrevLocTpl.Col);
-      DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                               getRoot(), LabelID));
-      return 0;
-    }
-
-    // This is a beginning of a new function.
-    MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-
-    if (!DW || !DW->ShouldEmitDwarfDebug())
-      return 0;
-    // llvm.dbg.func_start also defines beginning of function scope.
-    DW->RecordRegionStart(FSI.getSubprogram());
-    return 0;
-  }
   case Intrinsic::dbg_declare: {
     if (OptLevel != CodeGenOpt::None) 
       // FIXME: Variable debug info is not supported here.
@@ -3972,13 +3941,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     if (SI == FuncInfo.StaticAllocaMap.end()) 
       return 0; // VLAs.
     int FI = SI->second;
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+
     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-    if (MMI) 
-      MMI->setVariableDbgInfo(Variable, FI);
-#else
-    DW->RecordVariable(Variable, FI);
-#endif
+    if (MMI) {
+      MetadataContext &TheMetadata = 
+        DI.getParent()->getContext().getMetadata();
+      unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+      MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
+      MMI->setVariableDbgInfo(Variable, FI, Dbg);
+    }
     return 0;
   }
   case Intrinsic::eh_exception: {
@@ -4233,7 +4204,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     EVT Ty = Arg.getValueType();
 
     if (CI->getZExtValue() < 2)
-      setValue(&I, DAG.getConstant(-1U, Ty));
+      setValue(&I, DAG.getConstant(-1ULL, Ty));
     else
       setValue(&I, DAG.getConstant(0, Ty));
     return 0;
@@ -4355,6 +4326,16 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
   case Intrinsic::atomic_swap:
     return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return 0;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    return 0;
   }
 }
 
@@ -4368,7 +4349,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 /// TargetLowering::IsEligibleForTailCallOptimization.
 ///
 static bool
-isInTailCallPosition(const Instruction *I, Attributes RetAttr,
+isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
                      const TargetLowering &TLI) {
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
@@ -4395,9 +4376,14 @@ isInTailCallPosition(const Instruction *I, Attributes RetAttr,
   // what the call's return type is.
   if (!Ret || Ret->getNumOperands() == 0) return true;
 
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
   // Conservatively require the attributes of the call to match those of
-  // the return.
-  if (F->getAttributes().getRetAttributes() != RetAttr)
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
     return false;
 
   // Otherwise, make sure the unmodified return value of I is the return value.
@@ -4431,15 +4417,52 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
                                        MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const Type *RetTy = FTy->getReturnType();
   MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
   unsigned BeginLabel = 0, EndLabel = 0;
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Args.reserve(CS.arg_size());
-  unsigned j = 1;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<EVT, 4> OutVTs;
+  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  SmallVector<uint64_t, 4> Offsets;
+  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), 
+    OutVTs, OutsFlags, TLI, &Offsets);
+  
+
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), 
+                        FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+
+  SDValue DemoteStackSlot;
+
+  if (!CanLowerReturn) {
+    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+                      FTy->getReturnType());
+    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+                      FTy->getReturnType());
+    MachineFunction &MF = DAG.getMachineFunction();
+    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.Alignment = Align;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(FTy->getContext());
+  }
+
   for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
-       i != e; ++i, ++j) {
+       i != e; ++i) {
     SDValue ArgNode = getValue(*i);
     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
 
@@ -4475,7 +4498,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
     isTailCall = false;
 
   std::pair<SDValue,SDValue> Result =
-    TLI.LowerCallTo(getRoot(), CS.getType(),
+    TLI.LowerCallTo(getRoot(), RetTy,
                     CS.paramHasAttr(0, Attribute::SExt),
                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
@@ -4489,6 +4512,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
          "Null value expected with tail call!");
   if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
+  else if (!CanLowerReturn && Result.second.getNode()) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+    ComputeValueVTs(TLI, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+    unsigned NumValues = OutVTs.size();
+    SmallVector<SDValue, 4> Values(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+        DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot,
+        DAG.getConstant(Offsets[i], PtrVT)),
+        NULL, Offsets[i], false, 1);
+      Values[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], NumValues);
+    PendingLoads.push_back(Chain);
+
+    setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES,
+             getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues),
+             &Values[0], NumValues));
+  }
   // As a special case, a null chain means that a tail call has
   // been emitted and the DAG root is already updated.
   if (Result.second.getNode())
@@ -5229,7 +5281,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
         unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
         MachineFunction &MF = DAG.getMachineFunction();
-        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
         Chain = DAG.getStore(Chain, getCurDebugLoc(),
                              OpInfo.CallOperand, StackSlot, NULL, 0);
@@ -5757,9 +5809,32 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
   SDValue OldRoot = DAG.getRoot();
   DebugLoc dl = SDL->getCurDebugLoc();
   const TargetData *TD = TLI.getTargetData();
+  SmallVector<ISD::InputArg, 16> Ins;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<EVT, 4> OutVTs;
+  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 
+                OutVTs, OutsFlags, TLI);
+  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), 
+    OutVTs, OutsFlags, DAG);
+  if (!FLI.CanLowerReturn) {
+    // Put in an sret pointer parameter before all the other parameters.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+    // NOTE: Assuming that a pointer will never break down to more than one VT
+    // or one register.
+    ISD::ArgFlagsTy Flags;
+    Flags.setSRet();
+    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    Ins.push_back(RetArg);
+  }
 
   // Set up the incoming argument description vector.
-  SmallVector<ISD::InputArg, 16> Ins;
   unsigned Idx = 1;
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
        I != E; ++I, ++Idx) {
@@ -5837,6 +5912,28 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
   // Set up the argument values.
   unsigned i = 0;
   Idx = 1;
+  if (!FLI.CanLowerReturn) {
+    // Create a virtual register for the sret pointer, and put in a copy
+    // from the sret argument into it.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    EVT VT = ValueVTs[0];
+    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
+                                        VT, AssertOp);
+
+    MachineFunction& MF = SDL->DAG.getMachineFunction();
+    MachineRegisterInfo& RegInfo = MF.getRegInfo();
+    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+    FLI.DemoteRegister = SRetReg;
+    NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue);
+    DAG.setRoot(NewRoot);
+    
+    // i indexes lowered arguments.  Bump it past the hidden sret argument.
+    // Idx indexes LLVM arguments.  Don't touch it.
+    ++i;
+  }
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
       ++I, ++Idx) {
     SmallVector<SDValue, 4> ArgValues;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index a0ec7aa..10f256c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -90,6 +90,14 @@ public:
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
 
+  /// CanLowerReturn - true iff the function's return value can be lowered to
+  /// registers.
+  bool CanLowerReturn;
+
+  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+  /// allocated to hold a pointer to the hidden sret parameter.
+  unsigned DemoteRegister;
+
   explicit FunctionLoweringInfo(TargetLowering &TLI);
 
   /// set - Initialize this FunctionLoweringInfo with the given Function
@@ -193,9 +201,9 @@ class SelectionDAGLowering {
     Case() : Low(0), High(0), BB(0) { }
     Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
       Low(low), High(high), BB(bb) { }
-    uint64_t size() const {
-      uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
-      uint64_t rLow  = cast<ConstantInt>(Low)->getSExtValue();
+    APInt size() const {
+      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
       return (rHigh - rLow + 1ULL);
     }
   };
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b63d5bb..ab5f21e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -68,7 +68,7 @@ static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
 static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies",
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
                   cl::desc("Schedule copies of livein registers"),
                   cl::init(false));
 
@@ -387,13 +387,14 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
     if (MDDbgKind) {
       // Update DebugLoc if debug information is attached with this
       // instruction.
-      if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
-        DILocation DILoc(Dbg);
-        DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
-        SDL->setCurDebugLoc(Loc);
-        if (MF->getDefaultDebugLoc().isUnknown())
-          MF->setDefaultDebugLoc(Loc);
-      }
+      if (!isa<DbgInfoIntrinsic>(I)) 
+        if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+          DILocation DILoc(Dbg);
+          DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+          SDL->setCurDebugLoc(Loc);
+          if (MF->getDefaultDebugLoc().isUnknown())
+            MF->setDefaultDebugLoc(Loc);
+        }
     }
     if (!isa<TerminatorInst>(I))
       SDL->visit(*I);
@@ -750,14 +751,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         if (MDDbgKind) {
           // Update DebugLoc if debug information is attached with this
           // instruction.
-          if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
-            DILocation DILoc(Dbg);
-            DebugLoc Loc = ExtractDebugLocation(DILoc,
-                                                MF.getDebugLocInfo());
-            FastIS->setCurDebugLoc(Loc);
-            if (MF.getDefaultDebugLoc().isUnknown())
-              MF.setDefaultDebugLoc(Loc);
-          }
+          if (!isa<DbgInfoIntrinsic>(BI)) 
+            if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
+              DILocation DILoc(Dbg);
+              DebugLoc Loc = ExtractDebugLocation(DILoc,
+                                                  MF.getDebugLocInfo());
+              FastIS->setCurDebugLoc(Loc);
+              if (MF.getDefaultDebugLoc().isUnknown())
+                MF.setDefaultDebugLoc(Loc);
+            }
         }
 
         // Just before the terminator instruction, insert instructions to
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9f36b67..2ca52a4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -22,7 +22,6 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -65,22 +64,27 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::SRA_I32] = "__ashrsi3";
   Names[RTLIB::SRA_I64] = "__ashrdi3";
   Names[RTLIB::SRA_I128] = "__ashrti3";
+  Names[RTLIB::MUL_I8] = "__mulqi3";
   Names[RTLIB::MUL_I16] = "__mulhi3";
   Names[RTLIB::MUL_I32] = "__mulsi3";
   Names[RTLIB::MUL_I64] = "__muldi3";
   Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::SDIV_I8] = "__divqi3";
   Names[RTLIB::SDIV_I16] = "__divhi3";
   Names[RTLIB::SDIV_I32] = "__divsi3";
   Names[RTLIB::SDIV_I64] = "__divdi3";
   Names[RTLIB::SDIV_I128] = "__divti3";
+  Names[RTLIB::UDIV_I8] = "__udivqi3";
   Names[RTLIB::UDIV_I16] = "__udivhi3";
   Names[RTLIB::UDIV_I32] = "__udivsi3";
   Names[RTLIB::UDIV_I64] = "__udivdi3";
   Names[RTLIB::UDIV_I128] = "__udivti3";
+  Names[RTLIB::SREM_I8] = "__modqi3";
   Names[RTLIB::SREM_I16] = "__modhi3";
   Names[RTLIB::SREM_I32] = "__modsi3";
   Names[RTLIB::SREM_I64] = "__moddi3";
   Names[RTLIB::SREM_I128] = "__modti3";
+  Names[RTLIB::UREM_I8] = "__umodqi3";
   Names[RTLIB::UREM_I16] = "__umodhi3";
   Names[RTLIB::UREM_I32] = "__umodsi3";
   Names[RTLIB::UREM_I64] = "__umoddi3";
@@ -2360,7 +2364,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
 
   // Remove the braces from around the name.
-  std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+  StringRef RegName(Constraint.data()+1, Constraint.size()-2);
 
   // Figure out which register class contains this reg.
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
@@ -2383,7 +2387,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     
     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
          I != E; ++I) {
-      if (StringsEqualNoCase(RegName, RI->getName(*I)))
+      if (RegName.equals_lower(RI->getName(*I)))
         return std::make_pair(*I, RC);
     }
   }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b5d6b47..3909c56 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -709,7 +709,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     }
 
   MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
-  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI);
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
   MachineInstr *NewMI = prior(MII);
 
   if (checkForDeadDef) {
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index f3ad0d1..f85384b 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -13,15 +13,43 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ManagedStatic.h"
 
 using namespace llvm;
 
-std::auto_ptr<IndexListEntry> IndexListEntry::emptyKeyEntry,
-                              IndexListEntry::tombstoneKeyEntry;
+
+// Yep - these are thread safe. See the header for details. 
+namespace {
+
+
+  class EmptyIndexListEntry : public IndexListEntry {
+  public:
+    EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {}
+  };
+
+  class TombstoneIndexListEntry : public IndexListEntry {
+  public:
+    TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {}
+  };
+
+  // The following statics are thread safe. They're read only, and you
+  // can't step from them to any other list entries.
+  ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey;
+  ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey;
+}
 
 char SlotIndexes::ID = 0;
 static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
 
+IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
+  return &*IndexListEntryEmptyKey;
+}
+
+IndexListEntry* IndexListEntry::getTombstoneKeyEntry() {
+  return &*IndexListEntryTombstoneKey;
+}
+
+
 void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
   au.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(au);
@@ -51,8 +79,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
   mf = &fn;
   initList();
 
-  const unsigned gap = 1;
-
   // Check that the list contains only the sentinal.
   assert(indexListHead->getNext() == 0 &&
          "Index list non-empty at initial numbering?");
@@ -64,14 +90,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
          "MachineInstr -> Index mapping non-empty at initial numbering?");
 
   functionSize = 0;
-  /*  
-  for (unsigned s = 0; s < SlotIndex::NUM; ++s) {  
-    indexList.push_back(createEntry(0, s));
-  }
-
-  unsigned index = gap * SlotIndex::NUM;
-  */
-
   unsigned index = 0;
 
   // Iterate over the the function.
@@ -83,7 +101,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
     push_back(createEntry(0, index));
     SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
 
-    index += gap * SlotIndex::NUM;
+    index += SlotIndex::NUM;
 
     for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
          miItr != miEnd; ++miItr) {
@@ -93,7 +111,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
         push_back(createEntry(0, index));
         terminatorGaps.insert(
           std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
-        index += gap * SlotIndex::NUM;
+        index += SlotIndex::NUM;
       }
 
       // Insert a store index for the instr.
@@ -109,14 +127,14 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       if (Slots == 0)
         Slots = 1;
 
-      index += (Slots + 1) * gap * SlotIndex::NUM;
+      index += (Slots + 1) * SlotIndex::NUM;
     }
 
     if (mbb->getFirstTerminator() == mbb->end()) {
       push_back(createEntry(0, index));
       terminatorGaps.insert(
         std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
-      index += gap * SlotIndex::NUM;
+      index += SlotIndex::NUM;
     }
 
     SlotIndex blockEndIndex(back(), SlotIndex::STORE);
@@ -138,21 +156,36 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
   return false;
 }
 
-void SlotIndexes::renumber() {
-  assert(false && "SlotIndexes::runmuber is not fully implemented yet.");
+void SlotIndexes::renumberIndexes() {
 
-  // Compute numbering as follows:
-  // Grab an iterator to the start of the index list.
-  // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
-  // iterator in lock-step (though skipping it over indexes which have
-  // null pointers in the instruction field).
-  // At each iteration assert that the instruction pointed to in the index
-  // is the same one pointed to by the MI iterator. This 
+  // Renumber updates the index of every element of the index list.
+  // If all instrs in the function have been allocated an index (which has been
+  // placed in the index list in the order of instruction iteration) then the
+  // resulting numbering will match what would have been generated by the
+  // pass during the initial numbering of the function if the new instructions
+  // had been present.
 
-  // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
-  // only need to be set up once - when the first numbering is computed.
+  functionSize = 0;
+  unsigned index = 0;
+
+  for (IndexListEntry *curEntry = front(); curEntry != getTail();
+       curEntry = curEntry->getNext()) {
 
-  assert(false && "Renumbering not supported yet.");
+    curEntry->setIndex(index);
+
+    if (curEntry->getInstr() == 0) {
+      // MBB start entry or terminator gap. Just step index by 1.
+      index += SlotIndex::NUM;
+    }
+    else {
+      ++functionSize;
+      unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs();
+      if (Slots == 0)
+        Slots = 1;
+
+      index += (Slots + 1) * SlotIndex::NUM;
+    }
+  }
 }
 
 void SlotIndexes::dump() const {
@@ -167,7 +200,7 @@ void SlotIndexes::dump() const {
     }
   }
 
-  for (MBB2IdxMap::iterator itr = mbb2IdxMap.begin();
+  for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
        itr != mbb2IdxMap.end(); ++itr) {
     errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
            << itr->second.first << ", " << itr->second.second << "]\n";
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 95e85be..9107325 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -52,16 +52,16 @@ protected:
   /// Ensures there is space before the given machine instruction, returns the
   /// instruction's new number.
   SlotIndex makeSpaceBefore(MachineInstr *mi) {
-    if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+    //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
       // FIXME: Should be updated to use rewrite-in-place methods when they're
       // introduced. Currently broken.
       //lis->scaleNumbering(2);
       //ls->scaleNumbering(2);
-    }
+    //}
 
     SlotIndex miIdx = lis->getInstructionIndex(mi);
 
-    assert(lis->hasGapBeforeInstr(miIdx));
+    //assert(lis->hasGapBeforeInstr(miIdx));
     
     return miIdx;
   }
@@ -69,16 +69,16 @@ protected:
   /// Ensure there is space after the given machine instruction, returns the
   /// instruction's new number.
   SlotIndex makeSpaceAfter(MachineInstr *mi) {
-    if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
+    //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
       // FIXME: Should be updated to use rewrite-in-place methods when they're
       // introduced. Currently broken.
       // lis->scaleNumbering(2);
       // ls->scaleNumbering(2);
-    }
+    //}
 
     SlotIndex miIdx = lis->getInstructionIndex(mi);
 
-    assert(lis->hasGapAfterInstr(miIdx));
+    //assert(lis->hasGapAfterInstr(miIdx));
 
     return miIdx;
   }  
@@ -99,14 +99,8 @@ protected:
                              true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(next(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    SlotIndex storeInstIdx = miIdx.getNextIndex();
-
-    assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
-           "Store inst index already in use.");
     
-    lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
-
-    return storeInstIdx;
+    return lis->InsertMachineInstrInMaps(storeInst);
   }
 
   /// Insert a store of the given vreg to the given stack slot immediately
@@ -120,14 +114,8 @@ protected:
     tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(prior(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    SlotIndex storeInstIdx = miIdx.getPrevIndex();
-
-    assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
-           "Store inst index already in use.");
 
-    lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
-
-    return storeInstIdx;
+    return lis->InsertMachineInstrInMaps(storeInst);
   }
 
   void insertStoreAfterInstOnInterval(LiveInterval *li,
@@ -164,14 +152,8 @@ protected:
     tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(next(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    SlotIndex loadInstIdx = miIdx.getNextIndex();
-
-    assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
-           "Store inst index already in use.");
     
-    lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
-
-    return loadInstIdx;
+    return lis->InsertMachineInstrInMaps(loadInst);
   }
 
   /// Insert a load of the given vreg from the given stack slot immediately
@@ -186,14 +168,8 @@ protected:
     tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(prior(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    SlotIndex loadInstIdx = miIdx.getPrevIndex();
-
-    assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
-           "Load inst index already in use.");
-
-    lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
 
-    return loadInstIdx;
+    return lis->InsertMachineInstrInMaps(loadInst);
   }
 
   void insertLoadBeforeInstOnInterval(LiveInterval *li,
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index c646869..102e2a3 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -135,14 +135,52 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator I,
                                         unsigned DestReg,
                                         unsigned SubIdx,
-                                        const MachineInstr *Orig) const {
+                                        const MachineInstr *Orig,
+                                        const TargetRegisterInfo *TRI) const {
   MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
   MachineOperand &MO = MI->getOperand(0);
-  MO.setReg(DestReg);
-  MO.setSubReg(SubIdx);
+  if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+    MO.setReg(DestReg);
+    MO.setSubReg(SubIdx);
+  } else if (SubIdx) {
+    MO.setReg(TRI->getSubReg(DestReg, SubIdx));
+  } else {
+    MO.setReg(DestReg);
+  }
   MBB.insert(I, MI);
 }
 
+bool
+TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
+                                 const MachineInstr *Other,
+                                 const MachineRegisterInfo *MRI) const {
+  if (MI->getOpcode() != Other->getOpcode() ||
+      MI->getNumOperands() != Other->getNumOperands())
+    return false;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    const MachineOperand &OMO = Other->getOperand(i);
+    if (MO.isReg() && MO.isDef()) {
+      assert(OMO.isReg() && OMO.isDef());
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        if (Reg != OMO.getReg())
+          return false;
+      } else if (MRI->getRegClass(MO.getReg()) !=
+                 MRI->getRegClass(OMO.getReg()))
+        return false;
+
+      continue;
+    }
+
+    if (!MO.isIdenticalTo(OMO))
+      return false;
+  }
+
+  return true;
+}
+
 unsigned
 TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
   unsigned FnSize = 0;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0a6a0d7..84467ed 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1033,7 +1033,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
             DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
             unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
-            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI);
+            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
             ReMatRegs.set(regB);
             ++NumReMats;
           } else {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ce3eed1..c8c5d86 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -117,8 +117,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
          "attempt to assign stack slot to already spilled register");
   const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
-  int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
-                                              RC->getAlignment(), /*isSS*/true);
+  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                 RC->getAlignment());
   if (LowSpillSlot == NO_STACK_SLOT)
     LowSpillSlot = SS;
   if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
@@ -161,8 +161,8 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
     EmergencySpillSlots.find(RC);
   if (I != EmergencySpillSlots.end())
     return I->second;
-  int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
-                                              RC->getAlignment(), /*isSS*/true);
+  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                 RC->getAlignment());
   if (LowSpillSlot == NO_STACK_SLOT)
     LowSpillSlot = SS;
   if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index fd80f46..ec0abd1 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -483,19 +483,20 @@ static void InvalidateKills(MachineInstr &MI,
 }
 
 /// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since it's spill instruction is removed), mark it isDead. Also checks if
+/// (since its spill instruction is removed), mark it isDead. Also checks if
 /// the def MI has other definition operands that are not dead. Returns it by
 /// reference.
 static bool InvalidateRegDef(MachineBasicBlock::iterator I,
                              MachineInstr &NewDef, unsigned Reg,
-                             bool &HasLiveDef) {
+                             bool &HasLiveDef, 
+                             const TargetRegisterInfo *TRI) {
   // Due to remat, it's possible this reg isn't being reused. That is,
   // the def of this reg (by prev MI) is now dead.
   MachineInstr *DefMI = I;
   MachineOperand *DefOp = NULL;
   for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = DefMI->getOperand(i);
-    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+    if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
       continue;
     if (MO.getReg() == Reg)
       DefOp = &MO;
@@ -512,7 +513,8 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I,
     MachineInstr *NMI = I;
     for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
       MachineOperand &MO = NMI->getOperand(j);
-      if (!MO.isReg() || MO.getReg() != Reg)
+      if (!MO.isReg() || MO.getReg() == 0 ||
+          (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
         continue;
       if (MO.isUse())
         FoundUse = true;
@@ -556,11 +558,30 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
         KillOps[*SR] = NULL;
         RegKills.reset(*SR);
       }
-
-      if (!MI.isRegTiedToDefOperand(i))
-        // Unless it's a two-address operand, this is the new kill.
-        MO.setIsKill();
+    } else {
+      // Check for subreg kills as well.
+      // d4 = 
+      // store d4, fi#0
+      // ...
+      //    = s8<kill>
+      // ...
+      //    = d4  <avoiding reload>
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+        unsigned SReg = *SR;
+        if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
+          KillOps[SReg]->setIsKill(false);
+          unsigned KReg = KillOps[SReg]->getReg();
+          KillOps[KReg] = NULL;
+          RegKills.reset(KReg);
+
+          for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
+            KillOps[*SSR] = NULL;
+            RegKills.reset(*SSR);
+          }
+        }
+      }
     }
+
     if (MO.isKill()) {
       RegKills.set(Reg);
       KillOps[Reg] = &MO;
@@ -573,7 +594,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
 
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isDef())
+    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
       continue;
     unsigned Reg = MO.getReg();
     RegKills.reset(Reg);
@@ -583,6 +604,10 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
       RegKills.reset(*SR);
       KillOps[*SR] = NULL;
     }
+    for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+      RegKills.reset(*SR);
+      KillOps[*SR] = NULL;
+    }
   }
 }
 
@@ -601,7 +626,7 @@ static void ReMaterialize(MachineBasicBlock &MBB,
          "Don't know how to remat instructions that define > 1 values!");
 #endif
   TII->reMaterialize(MBB, MII, DestReg,
-                     ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI);
+                     ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI);
   MachineInstr *NewMI = prior(MII);
   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
@@ -816,11 +841,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
                "A reuse cannot be a virtual register");
         if (PRRU != RealPhysRegUsed) {
           // What was the sub-register index?
-          unsigned SubReg;
-          for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++)
-            if (SubReg == RealPhysRegUsed)
-              break;
-          assert(SubReg == RealPhysRegUsed &&
+          SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+          assert(SubIdx &&
                  "Operand physreg is not a sub-register of PhysRegUsed");
         }
 
@@ -1454,7 +1476,7 @@ private:
         // being reused.
         for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
           bool HasOtherDef = false;
-          if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) {
+          if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
             MachineInstr *DeadDef = PrevMII;
             if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
               // FIXME: This assumes a remat def does not have side effects.
@@ -1704,6 +1726,7 @@ private:
 
             // Mark is killed.
             MachineInstr *CopyMI = prior(InsertLoc);
+            CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
             MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
             KillOpnd->setIsKill();
             UpdateKills(*CopyMI, TRI, RegKills, KillOps);
@@ -1984,6 +2007,7 @@ private:
           TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
 
           MachineInstr *CopyMI = prior(InsertLoc);
+          CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
           UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
           // This invalidates DesignatedReg.
@@ -2112,6 +2136,7 @@ private:
                 // virtual or needing to clobber any values if it's physical).
                 NextMII = &MI;
                 --NextMII;  // backtrack to the copy.
+                NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
                 // Propagate the sub-register index over.
                 if (SubIdx) {
                   DefMO = NextMII->findRegisterDefOperand(DestReg);
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 5fd63ee..7bcd30a 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -13,9 +13,13 @@
 
 #include "llvm/CompilerDriver/Action.h"
 #include "llvm/CompilerDriver/BuiltinOptions.h"
+
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Program.h"
+#include "llvm/System/TimeValue.h"
+
 #include <stdexcept>
+#include <string>
 
 using namespace llvm;
 using namespace llvmc;
@@ -60,14 +64,31 @@ namespace {
   }
 }
 
+namespace llvmc {
+  void AppendToGlobalTimeLog(const std::string& cmd, double time);
+}
+
 int llvmc::Action::Execute() const {
   if (DryRun || VerboseMode) {
     errs() << Command_ << " ";
     std::for_each(Args_.begin(), Args_.end(), print_string);
     errs() << '\n';
   }
-  if (DryRun)
-    return 0;
-  else
-    return ExecuteProgram(Command_, Args_);
+  if (!DryRun) {
+    if (Time) {
+      sys::TimeValue now = sys::TimeValue::now();
+      int ret = ExecuteProgram(Command_, Args_);
+      sys::TimeValue now2 = sys::TimeValue::now();
+      now2 -= now;
+      double elapsed = now2.seconds()  + now2.microseconds()  / 1000000.0;
+      AppendToGlobalTimeLog(Command_, elapsed);
+
+      return ret;
+    }
+    else {
+      return ExecuteProgram(Command_, Args_);
+    }
+  }
+
+  return 0;
 }
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
index d90c50d..d1ac8c9 100644
--- a/lib/CompilerDriver/BuiltinOptions.cpp
+++ b/lib/CompilerDriver/BuiltinOptions.cpp
@@ -30,8 +30,10 @@ cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
 cl::list<std::string> Languages("x",
           cl::desc("Specify the language of the following input files"),
           cl::ZeroOrMore);
+
 cl::opt<bool> DryRun("dry-run",
                      cl::desc("Only pretend to run commands"));
+cl::opt<bool> Time("time", cl::desc("Time individual commands"));
 cl::opt<bool> VerboseMode("v",
                           cl::desc("Enable verbose mode"));
 
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index c581809..3a3487a 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Path.h"
 
+#include <sstream>
 #include <stdexcept>
 #include <string>
 
@@ -28,6 +29,8 @@ using namespace llvmc;
 
 namespace {
 
+  std::stringstream* GlobalTimeLog;
+
   sys::Path getTempDir() {
     sys::Path tempDir;
 
@@ -81,6 +84,11 @@ namespace {
 
 namespace llvmc {
 
+// Used to implement -time option. External linkage is intentional.
+void AppendToGlobalTimeLog(const std::string& cmd, double time) {
+  *GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
+}
+
 // Sometimes plugins want to condition on the value in argv[0].
 const char* ProgramName;
 
@@ -122,7 +130,19 @@ int Main(int argc, char** argv) {
       throw std::runtime_error("no input files");
     }
 
-    return BuildTargets(graph, langMap);
+    if (Time) {
+      GlobalTimeLog = new std::stringstream;
+      GlobalTimeLog->precision(2);
+    }
+
+    int ret = BuildTargets(graph, langMap);
+
+    if (Time) {
+      llvm::errs() << GlobalTimeLog->str();
+      delete GlobalTimeLog;
+    }
+
+    return ret;
   }
   catch(llvmc::error_code& ec) {
     return ec.code();
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 5a32fd3..9f4ab49 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -20,11 +20,6 @@
 using namespace llvm;
 using namespace llvmc;
 
-// SplitString is used by derived Tool classes.
-typedef void (*SplitStringFunPtr)(const std::string&,
-                                  std::vector<std::string>&, const char*);
-SplitStringFunPtr ForceLinkageSplitString = &llvm::SplitString;
-
 namespace {
   sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
                          const std::string& Suffix) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 21499e5..cb30748 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -40,7 +40,8 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP,
                                              std::string *ErrorStr,
                                              JITMemoryManager *JMM,
                                              CodeGenOpt::Level OptLevel,
-                                             bool GVsWithCode) = 0;
+                                             bool GVsWithCode,
+					     CodeModel::Model CMM) = 0;
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP,
                                                 std::string *ErrorStr) = 0;
 ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
@@ -52,7 +53,6 @@ ExecutionEngine::ExecutionEngine(ModuleProvider *P)
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
-  DlsymStubsEnabled       = false;
   Modules.push_back(P);
   assert(P && "ModuleProvider is null?");
 }
@@ -445,7 +445,7 @@ ExecutionEngine *EngineBuilder::create() {
     if (ExecutionEngine::JITCtor) {
       ExecutionEngine *EE =
         ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel,
-                                 AllocateGVsWithCode);
+                                 AllocateGVsWithCode, CMModel);
       if (EE) return EE;
     }
   }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 01bd2c7..b59cfd1 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -882,16 +882,6 @@ void Interpreter::visitCallSite(CallSite CS) {
          e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
     Value *V = *i;
     ArgVals.push_back(getOperandValue(V, SF));
-    // Promote all integral types whose size is < sizeof(i32) into i32.
-    // We do this by zero or sign extending the value as appropriate
-    // according to the parameter attributes
-    const Type *Ty = V->getType();
-    if (Ty->isInteger() && (ArgVals.back().IntVal.getBitWidth() < 32)) {
-      if (CS.paramHasAttr(pNum, Attribute::ZExt))
-        ArgVals.back().IntVal = ArgVals.back().IntVal.zext(32);
-      else if (CS.paramHasAttr(pNum, Attribute::SExt))
-        ArgVals.back().IntVal = ArgVals.back().IntVal.sext(32);
-    }
   }
 
   // To handle indirect calls, we must get the pointer value from the argument
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 8c45a36..c02d84f 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -158,7 +158,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
       }
     case Type::FloatTyID: {
       float *FloatPtr = (float *) ArgDataPtr;
-      *FloatPtr = AV.DoubleVal;
+      *FloatPtr = AV.FloatVal;
       return ArgDataPtr;
     }
     case Type::DoubleTyID: {
@@ -284,6 +284,9 @@ GenericValue Interpreter::callExternalFunction(Function *F,
   else
     llvm_report_error("Tried to execute an unknown external function: " +
                       F->getType()->getDescription() + " " +F->getName());
+#ifndef USE_LIBFFI
+  errs() << "Recompiling LLVM with --enable-libffi might help.\n";
+#endif
   return GenericValue();
 }
 
@@ -419,83 +422,6 @@ GenericValue lle_X_printf(const FunctionType *FT,
   return GV;
 }
 
-static void ByteswapSCANFResults(LLVMContext &C,
-                                 const char *Fmt, void *Arg0, void *Arg1,
-                                 void *Arg2, void *Arg3, void *Arg4, void *Arg5,
-                                 void *Arg6, void *Arg7, void *Arg8) {
-  void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 };
-
-  // Loop over the format string, munging read values as appropriate (performs
-  // byteswaps as necessary).
-  unsigned ArgNo = 0;
-  while (*Fmt) {
-    if (*Fmt++ == '%') {
-      // Read any flag characters that may be present...
-      bool Suppress = false;
-      bool Half = false;
-      bool Long = false;
-      bool LongLong = false;  // long long or long double
-
-      while (1) {
-        switch (*Fmt++) {
-        case '*': Suppress = true; break;
-        case 'a': /*Allocate = true;*/ break;  // We don't need to track this
-        case 'h': Half = true; break;
-        case 'l': Long = true; break;
-        case 'q':
-        case 'L': LongLong = true; break;
-        default:
-          if (Fmt[-1] > '9' || Fmt[-1] < '0')   // Ignore field width specs
-            goto Out;
-        }
-      }
-    Out:
-
-      // Read the conversion character
-      if (!Suppress && Fmt[-1] != '%') { // Nothing to do?
-        unsigned Size = 0;
-        const Type *Ty = 0;
-
-        switch (Fmt[-1]) {
-        case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p':
-        case 'd':
-          if (Long || LongLong) {
-            Size = 8; Ty = Type::getInt64Ty(C);
-          } else if (Half) {
-            Size = 4; Ty = Type::getInt16Ty(C);
-          } else {
-            Size = 4; Ty = Type::getInt32Ty(C);
-          }
-          break;
-
-        case 'e': case 'g': case 'E':
-        case 'f':
-          if (Long || LongLong) {
-            Size = 8; Ty = Type::getDoubleTy(C);
-          } else {
-            Size = 4; Ty = Type::getFloatTy(C);
-          }
-          break;
-
-        case 's': case 'c': case '[':  // No byteswap needed
-          Size = 1;
-          Ty = Type::getInt8Ty(C);
-          break;
-
-        default: break;
-        }
-
-        if (Size) {
-          GenericValue GV;
-          void *Arg = Args[ArgNo++];
-          memcpy(&GV, Arg, Size);
-          TheInterpreter->StoreValueToMemory(GV, (GenericValue*)Arg, Ty);
-        }
-      }
-    }
-  }
-}
-
 // int sscanf(const char *format, ...);
 GenericValue lle_X_sscanf(const FunctionType *FT,
                           const std::vector<GenericValue> &args) {
@@ -508,9 +434,6 @@ GenericValue lle_X_sscanf(const FunctionType *FT,
   GenericValue GV;
   GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
                         Args[5], Args[6], Args[7], Args[8], Args[9]));
-  ByteswapSCANFResults(FT->getContext(),
-                       Args[1], Args[2], Args[3], Args[4],
-                       Args[5], Args[6], Args[7], Args[8], Args[9], 0);
   return GV;
 }
 
@@ -526,9 +449,6 @@ GenericValue lle_X_scanf(const FunctionType *FT,
   GenericValue GV;
   GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
                         Args[5], Args[6], Args[7], Args[8], Args[9]));
-  ByteswapSCANFResults(FT->getContext(),
-                       Args[0], Args[1], Args[2], Args[3], Args[4],
-                       Args[5], Args[6], Args[7], Args[8], Args[9]);
   return GV;
 }
 
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index e21d760..6d781c7 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -198,15 +198,17 @@ ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP,
                                             std::string *ErrorStr,
                                             JITMemoryManager *JMM,
                                             CodeGenOpt::Level OptLevel,
-                                            bool GVsWithCode) {
-    return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode);
+                                            bool GVsWithCode,
+					    CodeModel::Model CMM) {
+  return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode, CMM);
 }
 
 ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
                                 std::string *ErrorStr,
                                 JITMemoryManager *JMM,
                                 CodeGenOpt::Level OptLevel,
-                                bool GVsWithCode) {
+                                bool GVsWithCode,
+				CodeModel::Model CMM) {
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
@@ -215,6 +217,7 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
   // Pick a target either via -march or by guessing the native arch.
   TargetMachine *TM = JIT::selectTarget(MP, ErrorStr);
   if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+  TM->setCodeModel(CMM);
 
   // If the target supports JIT code generation, create a the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo()) {
@@ -613,11 +616,6 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
     // the stub with real address of the function.
     updateFunctionStub(PF);
   }
-  
-  // If the JIT is configured to emit info so that dlsym can be used to
-  // rewrite stubs to external globals, do so now.
-  if (areDlsymStubsEnabled() && !isCompilingLazily())
-    updateDlsymStubTable();
 }
 
 /// getPointerToFunction - This method is used to get the address of the
@@ -660,8 +658,7 @@ void *JIT::getPointerToFunction(Function *F) {
   }
 
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
-    bool AbortOnFailure =
-      !areDlsymStubsEnabled() && !F->hasExternalWeakLinkage();
+    bool AbortOnFailure = !F->hasExternalWeakLinkage();
     void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
     addGlobalMapping(F, Addr);
     return Addr;
@@ -690,7 +687,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
       return (void*)&__dso_handle;
 #endif
     Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
-    if (Ptr == 0 && !areDlsymStubsEnabled()) {
+    if (Ptr == 0) {
       llvm_report_error("Could not resolve external global address: "
                         +GV->getName());
     }
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index fb3cb24..f165bd6 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -85,8 +85,10 @@ public:
                                  JITMemoryManager *JMM,
                                  CodeGenOpt::Level OptLevel =
                                    CodeGenOpt::Default,
-                                 bool GVsWithCode = true) {
-    return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode);
+                                 bool GVsWithCode = true,
+				 CodeModel::Model CMM = CodeModel::Default) {
+    return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode,
+				      CMM);
   }
 
   virtual void addModuleProvider(ModuleProvider *MP);
@@ -175,7 +177,8 @@ public:
                                     std::string *ErrorStr,
                                     JITMemoryManager *JMM,
                                     CodeGenOpt::Level OptLevel,
-                                    bool GVsWithCode);
+                                    bool GVsWithCode,
+				    CodeModel::Model CMM);
 
   // Run the JIT on F and return information about the generated code
   void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
@@ -195,7 +198,6 @@ private:
                                        TargetMachine &tm);
   void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
   void updateFunctionStub(Function *F);
-  void updateDlsymStubTable();
 
 protected:
 
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 49faf64..565509c 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -35,7 +35,7 @@ namespace llvm {
 extern "C" {
 
   // Debuggers puts a breakpoint in this function.
-  void DISABLE_INLINE __jit_debug_register_code() { }
+  DISABLE_INLINE void __jit_debug_register_code() { }
 
   // We put information about the JITed function in this global, which the
   // debugger reads.  Make sure to specify the version statically, because the
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 79f1eb4..5f195ee 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -63,6 +63,7 @@ static JIT *TheJIT = 0;
 // JIT lazy compilation code.
 //
 namespace {
+  class JITEmitter;
   class JITResolverState;
 
   template<typename ValueTy>
@@ -213,16 +214,18 @@ namespace {
     std::map<void*, unsigned> revGOTMap;
     unsigned nextGOTIndex;
 
+    JITEmitter &JE;
+
     static JITResolver *TheJITResolver;
   public:
-    explicit JITResolver(JIT &jit) : nextGOTIndex(0) {
+    explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) {
       TheJIT = &jit;
 
       LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
       assert(TheJITResolver == 0 && "Multiple JIT resolvers?");
       TheJITResolver = this;
     }
-    
+
     ~JITResolver() {
       TheJITResolver = 0;
     }
@@ -244,19 +247,9 @@ namespace {
     /// specified GV address.
     void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
 
-    /// AddCallbackAtLocation - If the target is capable of rewriting an
-    /// instruction without the use of a stub, record the location of the use so
-    /// we know which function is being used at the location.
-    void *AddCallbackAtLocation(Function *F, void *Location) {
-      MutexGuard locked(TheJIT->lock);
-      /// Get the target-specific JIT resolver function.
-      state.AddCallSite(locked, Location, F);
-      return (void*)(intptr_t)LazyResolverFn;
-    }
-    
     void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
                            SmallVectorImpl<void*> &Ptrs);
-    
+
     GlobalValue *invalidateStub(void *Stub);
 
     /// getGOTIndexForAddress - Return a new or existing index in the GOT for
@@ -269,6 +262,225 @@ namespace {
     /// been compiled, this function compiles it first.
     static void *JITCompilerFn(void *Stub);
   };
+
+  /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+  /// used to output functions to memory for execution.
+  class JITEmitter : public JITCodeEmitter {
+    JITMemoryManager *MemMgr;
+
+    // When outputting a function stub in the context of some other function, we
+    // save BufferBegin/BufferEnd/CurBufferPtr here.
+    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+    // When reattempting to JIT a function after running out of space, we store
+    // the estimated size of the function we're trying to JIT here, so we can
+    // ask the memory manager for at least this much space.  When we
+    // successfully emit the function, we reset this back to zero.
+    uintptr_t SizeEstimate;
+
+    /// Relocations - These are the relocations that the function needs, as
+    /// emitted.
+    std::vector<MachineRelocation> Relocations;
+
+    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+    /// It is filled in by the StartMachineBasicBlock callback and queried by
+    /// the getMachineBasicBlockAddress callback.
+    std::vector<uintptr_t> MBBLocations;
+
+    /// ConstantPool - The constant pool for the current function.
+    ///
+    MachineConstantPool *ConstantPool;
+
+    /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+    ///
+    void *ConstantPoolBase;
+
+    /// ConstPoolAddresses - Addresses of individual constant pool entries.
+    ///
+    SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+    /// JumpTable - The jump tables for the current function.
+    ///
+    MachineJumpTableInfo *JumpTable;
+
+    /// JumpTableBase - A pointer to the first entry in the jump table.
+    ///
+    void *JumpTableBase;
+
+    /// Resolver - This contains info about the currently resolved functions.
+    JITResolver Resolver;
+
+    /// DE - The dwarf emitter for the jit.
+    OwningPtr<JITDwarfEmitter> DE;
+
+    /// DR - The debug registerer for the jit.
+    OwningPtr<JITDebugRegisterer> DR;
+
+    /// LabelLocations - This vector is a mapping from Label ID's to their
+    /// address.
+    std::vector<uintptr_t> LabelLocations;
+
+    /// MMI - Machine module info for exception informations
+    MachineModuleInfo* MMI;
+
+    // GVSet - a set to keep track of which globals have been seen
+    SmallPtrSet<const GlobalVariable*, 8> GVSet;
+
+    // CurFn - The llvm function being emitted.  Only valid during
+    // finishFunction().
+    const Function *CurFn;
+
+    /// Information about emitted code, which is passed to the
+    /// JITEventListeners.  This is reset in startFunction and used in
+    /// finishFunction.
+    JITEvent_EmittedFunctionDetails EmissionDetails;
+
+    struct EmittedCode {
+      void *FunctionBody;  // Beginning of the function's allocation.
+      void *Code;  // The address the function's code actually starts at.
+      void *ExceptionTable;
+      EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+    };
+    struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+      typedef JITEmitter *ExtraData;
+      static void onDelete(JITEmitter *, const Function*);
+      static void onRAUW(JITEmitter *, const Function*, const Function*);
+    };
+    ValueMap<const Function *, EmittedCode,
+             EmittedFunctionConfig> EmittedFunctions;
+
+    // CurFnStubUses - For a given Function, a vector of stubs that it
+    // references.  This facilitates the JIT detecting that a stub is no
+    // longer used, so that it may be deallocated.
+    DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
+
+    // StubFnRefs - For a given pointer to a stub, a set of Functions which
+    // reference the stub.  When the count of a stub's references drops to zero,
+    // the stub is unused.
+    DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
+
+    DebugLocTuple PrevDLT;
+
+  public:
+    JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+      : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+          EmittedFunctions(this) {
+      MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+      if (jit.getJITInfo().needsGOT()) {
+        MemMgr->AllocateGOT();
+        DEBUG(errs() << "JIT is managing a GOT\n");
+      }
+
+      if (DwarfExceptionHandling || JITEmitDebugInfo) {
+        DE.reset(new JITDwarfEmitter(jit));
+      }
+      if (JITEmitDebugInfo) {
+        DR.reset(new JITDebugRegisterer(TM));
+      }
+    }
+    ~JITEmitter() {
+      delete MemMgr;
+    }
+
+    /// classof - Methods for support type inquiry through isa, cast, and
+    /// dyn_cast:
+    ///
+    static inline bool classof(const JITEmitter*) { return true; }
+    static inline bool classof(const MachineCodeEmitter*) { return true; }
+
+    JITResolver &getJITResolver() { return Resolver; }
+
+    virtual void startFunction(MachineFunction &F);
+    virtual bool finishFunction(MachineFunction &F);
+
+    void emitConstantPool(MachineConstantPool *MCP);
+    void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+    void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+    virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
+                                   unsigned Alignment = 1);
+    virtual void startGVStub(const GlobalValue* GV, void *Buffer,
+                             unsigned StubSize);
+    virtual void* finishGVStub(const GlobalValue *GV);
+
+    /// allocateSpace - Reserves space in the current block if any, or
+    /// allocate a new one of the given size.
+    virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+    /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
+    /// this method does not allocate memory in the current output buffer,
+    /// because a global may live longer than the current function.
+    virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+    virtual void addRelocation(const MachineRelocation &MR) {
+      Relocations.push_back(MR);
+    }
+
+    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+        MBBLocations.resize((MBB->getNumber()+1)*2);
+      MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+      DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+                   << (void*) getCurrentPCValue() << "]\n");
+    }
+
+    virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+    virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+      assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+      return MBBLocations[MBB->getNumber()];
+    }
+
+    /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+    /// given function.  Increase the minimum allocation size so that we get
+    /// more memory next time.
+    void retryWithMoreMemory(MachineFunction &F);
+
+    /// deallocateMemForFunction - Deallocate all memory for the specified
+    /// function body.
+    void deallocateMemForFunction(const Function *F);
+
+    /// AddStubToCurrentFunction - Mark the current function being JIT'd as
+    /// using the stub at the specified address. Allows
+    /// deallocateMemForFunction to also remove stubs no longer referenced.
+    void AddStubToCurrentFunction(void *Stub);
+
+    virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
+    virtual void emitLabel(uint64_t LabelID) {
+      if (LabelLocations.size() <= LabelID)
+        LabelLocations.resize((LabelID+1)*2);
+      LabelLocations[LabelID] = getCurrentPCValue();
+    }
+
+    virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
+      assert(LabelLocations.size() > (unsigned)LabelID &&
+             LabelLocations[LabelID] && "Label not emitted!");
+      return LabelLocations[LabelID];
+    }
+
+    virtual void setModuleInfo(MachineModuleInfo* Info) {
+      MMI = Info;
+      if (DE.get()) DE->setModuleInfo(Info);
+    }
+
+    void setMemoryExecutable() {
+      MemMgr->setMemoryExecutable();
+    }
+
+    JITMemoryManager *getMemMgr() const { return MemMgr; }
+
+  private:
+    void *getPointerToGlobal(GlobalValue *GV, void *Reference,
+                             bool MayNeedFarStub);
+    void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
+    unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
+    unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
+    unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
+    unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
+  };
 }
 
 JITResolver *JITResolver::TheJITResolver = 0;
@@ -306,16 +518,13 @@ void *JITResolver::getFunctionStub(Function *F) {
     Actual = TheJIT->getPointerToFunction(F);
 
     // If we resolved the symbol to a null address (eg. a weak external)
-    // don't emit a stub. Return a null pointer to the application.  If dlsym
-    // stubs are enabled, not being able to resolve the address is not
-    // meaningful.
-    if (!Actual && !TheJIT->areDlsymStubsEnabled()) return 0;
+    // don't emit a stub. Return a null pointer to the application.
+    if (!Actual) return 0;
   }
 
   // Codegen a new stub, calling the lazy resolver or the actual address of the
   // external function, if it was resolved.
-  Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual,
-                                               *TheJIT->getCodeEmitter());
+  Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
 
   if (Actual != (void*)(intptr_t)LazyResolverFn) {
     // If we are getting the stub for an external function, we really want the
@@ -352,9 +561,9 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
 
   // Otherwise, codegen a new indirect symbol.
   IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
-                                                     *TheJIT->getCodeEmitter());
+                                                                JE);
 
-  DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym 
+  DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym
         << "] for GV '" << GV->getName() << "'\n");
 
   return IndirectSym;
@@ -367,8 +576,7 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) {
   void *&Stub = ExternalFnToStubMap[FnAddr];
   if (Stub) return Stub;
 
-  Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr,
-                                               *TheJIT->getCodeEmitter());
+  Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
 
   DEBUG(errs() << "JIT: Stub emitted at [" << Stub
                << "] for external function at '" << FnAddr << "'\n");
@@ -389,10 +597,10 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) {
 void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
                                     SmallVectorImpl<void*> &Ptrs) {
   MutexGuard locked(TheJIT->lock);
-  
+
   const FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked);
   GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-  
+
   for (FunctionToStubMapTy::const_iterator i = FM.begin(), e = FM.end();
        i != e; ++i){
     Function *F = i->first;
@@ -428,7 +636,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
     GM.erase(i);
     return GV;
   }
-  
+
   // Lastly, check to see if it's in the ExternalFnToStubMap.
   for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(),
        e = ExternalFnToStubMap.end(); i != e; ++i) {
@@ -437,7 +645,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
     ExternalFnToStubMap.erase(i);
     break;
   }
-  
+
   return 0;
 }
 
@@ -446,7 +654,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
 /// it if necessary, then returns the resultant function pointer.
 void *JITResolver::JITCompilerFn(void *Stub) {
   JITResolver &JR = *TheJITResolver;
-  
+
   Function* F = 0;
   void* ActualPtr = 0;
 
@@ -466,16 +674,16 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 
   // If we have already code generated the function, just return the address.
   void *Result = TheJIT->getPointerToGlobalIfAvailable(F);
-  
+
   if (!Result) {
     // Otherwise we don't have it, do lazy compilation now.
-    
+
     // If lazy compilation is disabled, emit a useful error message and abort.
     if (!TheJIT->isCompilingLazily()) {
       llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
-  
+
     DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName()
           << "' In stub ptr = " << Stub << " actual ptr = "
           << ActualPtr << "\n");
@@ -508,237 +716,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 //===----------------------------------------------------------------------===//
 // JITEmitter code.
 //
-namespace {
-  /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
-  /// used to output functions to memory for execution.
-  class JITEmitter : public JITCodeEmitter {
-    JITMemoryManager *MemMgr;
-
-    // When outputting a function stub in the context of some other function, we
-    // save BufferBegin/BufferEnd/CurBufferPtr here.
-    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
-
-    // When reattempting to JIT a function after running out of space, we store
-    // the estimated size of the function we're trying to JIT here, so we can
-    // ask the memory manager for at least this much space.  When we
-    // successfully emit the function, we reset this back to zero.
-    uintptr_t SizeEstimate;
-
-    /// Relocations - These are the relocations that the function needs, as
-    /// emitted.
-    std::vector<MachineRelocation> Relocations;
-    
-    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
-    /// It is filled in by the StartMachineBasicBlock callback and queried by
-    /// the getMachineBasicBlockAddress callback.
-    std::vector<uintptr_t> MBBLocations;
-
-    /// ConstantPool - The constant pool for the current function.
-    ///
-    MachineConstantPool *ConstantPool;
-
-    /// ConstantPoolBase - A pointer to the first entry in the constant pool.
-    ///
-    void *ConstantPoolBase;
-
-    /// ConstPoolAddresses - Addresses of individual constant pool entries.
-    ///
-    SmallVector<uintptr_t, 8> ConstPoolAddresses;
-
-    /// JumpTable - The jump tables for the current function.
-    ///
-    MachineJumpTableInfo *JumpTable;
-    
-    /// JumpTableBase - A pointer to the first entry in the jump table.
-    ///
-    void *JumpTableBase;
-
-    /// Resolver - This contains info about the currently resolved functions.
-    JITResolver Resolver;
-
-    /// DE - The dwarf emitter for the jit.
-    OwningPtr<JITDwarfEmitter> DE;
-
-    /// DR - The debug registerer for the jit.
-    OwningPtr<JITDebugRegisterer> DR;
-
-    /// LabelLocations - This vector is a mapping from Label ID's to their 
-    /// address.
-    std::vector<uintptr_t> LabelLocations;
-
-    /// MMI - Machine module info for exception informations
-    MachineModuleInfo* MMI;
-
-    // GVSet - a set to keep track of which globals have been seen
-    SmallPtrSet<const GlobalVariable*, 8> GVSet;
-
-    // CurFn - The llvm function being emitted.  Only valid during 
-    // finishFunction().
-    const Function *CurFn;
-
-    /// Information about emitted code, which is passed to the
-    /// JITEventListeners.  This is reset in startFunction and used in
-    /// finishFunction.
-    JITEvent_EmittedFunctionDetails EmissionDetails;
-
-    struct EmittedCode {
-      void *FunctionBody;  // Beginning of the function's allocation.
-      void *Code;  // The address the function's code actually starts at.
-      void *ExceptionTable;
-      EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
-    };
-    struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
-      typedef JITEmitter *ExtraData;
-      static void onDelete(JITEmitter *, const Function*);
-      static void onRAUW(JITEmitter *, const Function*, const Function*);
-    };
-    ValueMap<const Function *, EmittedCode,
-             EmittedFunctionConfig> EmittedFunctions;
-
-    // CurFnStubUses - For a given Function, a vector of stubs that it
-    // references.  This facilitates the JIT detecting that a stub is no
-    // longer used, so that it may be deallocated.
-    DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
-
-    // StubFnRefs - For a given pointer to a stub, a set of Functions which
-    // reference the stub.  When the count of a stub's references drops to zero,
-    // the stub is unused.
-    DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
-    
-    // ExtFnStubs - A map of external function names to stubs which have entries
-    // in the JITResolver's ExternalFnToStubMap.
-    StringMap<void *> ExtFnStubs;
-
-    DebugLocTuple PrevDLT;
-
-  public:
-    JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
-        : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0),
-          EmittedFunctions(this) {
-      MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
-      if (jit.getJITInfo().needsGOT()) {
-        MemMgr->AllocateGOT();
-        DEBUG(errs() << "JIT is managing a GOT\n");
-      }
-
-      if (DwarfExceptionHandling || JITEmitDebugInfo) {
-        DE.reset(new JITDwarfEmitter(jit));
-      }
-      if (JITEmitDebugInfo) {
-        DR.reset(new JITDebugRegisterer(TM));
-      }
-    }
-    ~JITEmitter() { 
-      delete MemMgr;
-    }
-
-    /// classof - Methods for support type inquiry through isa, cast, and
-    /// dyn_cast:
-    ///
-    static inline bool classof(const JITEmitter*) { return true; }
-    static inline bool classof(const MachineCodeEmitter*) { return true; }
-    
-    JITResolver &getJITResolver() { return Resolver; }
-
-    virtual void startFunction(MachineFunction &F);
-    virtual bool finishFunction(MachineFunction &F);
-    
-    void emitConstantPool(MachineConstantPool *MCP);
-    void initJumpTableInfo(MachineJumpTableInfo *MJTI);
-    void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
-    
-    virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
-                                   unsigned Alignment = 1);
-    virtual void startGVStub(const GlobalValue* GV, void *Buffer,
-                             unsigned StubSize);
-    virtual void* finishGVStub(const GlobalValue *GV);
-
-    /// allocateSpace - Reserves space in the current block if any, or
-    /// allocate a new one of the given size.
-    virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
-
-    /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
-    /// this method does not allocate memory in the current output buffer,
-    /// because a global may live longer than the current function.
-    virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
-
-    virtual void addRelocation(const MachineRelocation &MR) {
-      Relocations.push_back(MR);
-    }
-    
-    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
-      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-        MBBLocations.resize((MBB->getNumber()+1)*2);
-      MBBLocations[MBB->getNumber()] = getCurrentPCValue();
-      DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
-                   << (void*) getCurrentPCValue() << "]\n");
-    }
-
-    virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
-    virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
-
-    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
-             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-      return MBBLocations[MBB->getNumber()];
-    }
-
-    /// retryWithMoreMemory - Log a retry and deallocate all memory for the
-    /// given function.  Increase the minimum allocation size so that we get
-    /// more memory next time.
-    void retryWithMoreMemory(MachineFunction &F);
-
-    /// deallocateMemForFunction - Deallocate all memory for the specified
-    /// function body.
-    void deallocateMemForFunction(const Function *F);
-
-    /// AddStubToCurrentFunction - Mark the current function being JIT'd as
-    /// using the stub at the specified address. Allows
-    /// deallocateMemForFunction to also remove stubs no longer referenced.
-    void AddStubToCurrentFunction(void *Stub);
-    
-    /// getExternalFnStubs - Accessor for the JIT to find stubs emitted for
-    /// MachineRelocations that reference external functions by name.
-    const StringMap<void*> &getExternalFnStubs() const { return ExtFnStubs; }
-    
-    virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
-
-    virtual void emitLabel(uint64_t LabelID) {
-      if (LabelLocations.size() <= LabelID)
-        LabelLocations.resize((LabelID+1)*2);
-      LabelLocations[LabelID] = getCurrentPCValue();
-    }
-
-    virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
-      assert(LabelLocations.size() > (unsigned)LabelID && 
-             LabelLocations[LabelID] && "Label not emitted!");
-      return LabelLocations[LabelID];
-    }
- 
-    virtual void setModuleInfo(MachineModuleInfo* Info) {
-      MMI = Info;
-      if (DE.get()) DE->setModuleInfo(Info);
-    }
-
-    void setMemoryExecutable() {
-      MemMgr->setMemoryExecutable();
-    }
-    
-    JITMemoryManager *getMemMgr() const { return MemMgr; }
-
-  private:
-    void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub);
-    void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
-                                    bool NoNeedStub);
-    unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
-    unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
-    unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
-    unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
-  };
-}
-
 void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
-                                     bool DoesntNeedStub) {
+                                     bool MayNeedFarStub) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     return TheJIT->getOrEmitGlobalVariable(GV);
 
@@ -747,31 +726,26 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
 
   // If we have already compiled the function, return a pointer to its body.
   Function *F = cast<Function>(V);
-  void *ResultPtr;
-  if (!DoesntNeedStub) {
-    // Return the function stub if it's already created.
-    ResultPtr = Resolver.getFunctionStubIfAvailable(F);
-    if (ResultPtr)
-      AddStubToCurrentFunction(ResultPtr);
-  } else {
-    ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+
+  void *FnStub = Resolver.getFunctionStubIfAvailable(F);
+  if (FnStub) {
+    // Return the function stub if it's already created.  We do this first
+    // so that we're returning the same address for the function as any
+    // previous call.
+    AddStubToCurrentFunction(FnStub);
+    return FnStub;
   }
+
+  // Otherwise if we have code, go ahead and return that.
+  void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
   if (ResultPtr) return ResultPtr;
 
   // If this is an external function pointer, we can force the JIT to
-  // 'compile' it, which really just adds it to the map.  In dlsym mode, 
-  // external functions are forced through a stub, regardless of reloc type.
+  // 'compile' it, which really just adds it to the map.
   if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() &&
-      DoesntNeedStub && !TheJIT->areDlsymStubsEnabled())
+      !MayNeedFarStub)
     return TheJIT->getPointerToFunction(F);
 
-  // Okay, the function has not been compiled yet, if the target callback
-  // mechanism is capable of rewriting the instruction directly, prefer to do
-  // that instead of emitting a stub.  This uses the lazy resolver, so is not
-  // legal if lazy compilation is disabled.
-  if (DoesntNeedStub && TheJIT->isCompilingLazily())
-    return Resolver.AddCallbackAtLocation(F, Reference);
-
   // Otherwise, we have to emit a stub.
   void *StubAddr = Resolver.getFunctionStub(F);
 
@@ -785,17 +759,16 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
   return StubAddr;
 }
 
-void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
-                                            bool NoNeedStub) {
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
   // Make sure GV is emitted first, and create a stub containing the fully
   // resolved address.
-  void *GVAddress = getPointerToGlobal(V, Reference, true);
+  void *GVAddress = getPointerToGlobal(V, Reference, false);
   void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
-  
+
   // Add the stub to the current function's list of referenced stubs, so we can
   // deallocate them if the current function is ever freed.
   AddStubToCurrentFunction(StubAddr);
-  
+
   return StubAddr;
 }
 
@@ -820,7 +793,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
         NextLine.Loc = DL;
         EmissionDetails.LineStarts.push_back(NextLine);
       }
-  
+
       PrevDLT = CurDLT;
     }
   }
@@ -845,7 +818,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
 static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return 0;
-  
+
   unsigned NumEntries = 0;
   for (unsigned i = 0, e = JT.size(); i != e; ++i)
     NumEntries += JT[i].MBBs.size();
@@ -857,7 +830,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
 
 static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
   if (Alignment == 0) Alignment = 1;
-  // Since we do not know where the buffer will be allocated, be pessimistic. 
+  // Since we do not know where the buffer will be allocated, be pessimistic.
   return Size + Alignment;
 }
 
@@ -867,7 +840,7 @@ static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
 unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
   const Type *ElTy = GV->getType()->getElementType();
   size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
-  size_t GVAlign = 
+  size_t GVAlign =
       (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
   DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
   DEBUG(GV->dump());
@@ -884,7 +857,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
 /// but are referenced from the constant; put them in GVSet and add their
 /// size into the running total Size.
 
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, 
+unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
                                               unsigned Size) {
   // If its undefined, return the garbage.
   if (isa<UndefValue>(C))
@@ -947,7 +920,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
 /// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
 /// but are referenced from the given initializer.
 
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, 
+unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
                                               unsigned Size) {
   if (!isa<UndefValue>(Init) &&
       !isa<ConstantVector>(Init) &&
@@ -968,7 +941,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
   unsigned Size = 0;
   GVSet.clear();
 
-  for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
+  for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
        MBB != E; ++MBB) {
     for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
          I != E; ++I) {
@@ -1000,7 +973,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
   DEBUG(errs() << "JIT: About to look through initializers\n");
   // Look for more globals that are referenced only from initializers.
   // GVSet.end is computed each time because the set can grow as we go.
-  for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); 
+  for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
        I != GVSet.end(); I++) {
     const GlobalVariable* GV = *I;
     if (GV->hasInitializer())
@@ -1022,10 +995,10 @@ void JITEmitter::startFunction(MachineFunction &F) {
     const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
     MachineJumpTableInfo *MJTI = F.getJumpTableInfo();
     MachineConstantPool *MCP = F.getConstantPool();
-    
+
     // Ensure the constant pool/jump table info is at least 4-byte aligned.
     ActualSize = RoundUpToAlign(ActualSize, 16);
-    
+
     // Add the alignment of the constant pool
     ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment());
 
@@ -1037,7 +1010,7 @@ void JITEmitter::startFunction(MachineFunction &F) {
 
     // Add the jump table size
     ActualSize += GetJumpTableSizeInBytes(MJTI);
-    
+
     // Add the alignment for the function
     ActualSize = RoundUpToAlign(ActualSize,
                                 std::max(F.getFunction()->getAlignment(), 8U));
@@ -1110,29 +1083,19 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
           ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
                                                         false);
           DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
-                       << ResultPtr << "]\n"); 
+                       << ResultPtr << "]\n");
 
           // If the target REALLY wants a stub for this function, emit it now.
-          if (!MR.doesntNeedStub()) {
-            if (!TheJIT->areDlsymStubsEnabled()) {
-              ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
-            } else {
-              void *&Stub = ExtFnStubs[MR.getExternalSymbol()];
-              if (!Stub) {
-                Stub = Resolver.getExternalFunctionStub((void *)&Stub);
-                AddStubToCurrentFunction(Stub);
-              }
-              ResultPtr = Stub;
-            }
+          if (MR.mayNeedFarStub()) {
+            ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
           }
         } else if (MR.isGlobalValue()) {
           ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
                                          BufferBegin+MR.getMachineCodeOffset(),
-                                         MR.doesntNeedStub());
+                                         MR.mayNeedFarStub());
         } else if (MR.isIndirectSymbol()) {
-          ResultPtr = getPointerToGVIndirectSym(MR.getGlobalValue(),
-                                          BufferBegin+MR.getMachineCodeOffset(),
-                                          MR.doesntNeedStub());
+          ResultPtr = getPointerToGVIndirectSym(
+              MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
         } else if (MR.isBasicBlock()) {
           ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
         } else if (MR.isConstantPoolIndex()) {
@@ -1278,7 +1241,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
 
   if (MMI)
     MMI->EndFunction();
- 
+
   return false;
 }
 
@@ -1316,20 +1279,20 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
   // If the function did not reference any stubs, return.
   if (CurFnStubUses.find(F) == CurFnStubUses.end())
     return;
-  
+
   // For each referenced stub, erase the reference to this function, and then
   // erase the list of referenced stubs.
   SmallVectorImpl<void *> &StubList = CurFnStubUses[F];
   for (unsigned i = 0, e = StubList.size(); i != e; ++i) {
     void *Stub = StubList[i];
-    
+
     // If we already invalidated this stub for this function, continue.
     if (StubFnRefs.count(Stub) == 0)
       continue;
-      
+
     SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub];
     FnRefs.erase(F);
-    
+
     // If this function was the last reference to the stub, invalidate the stub
     // in the JITResolver.  Were there a memory manager deallocateStub routine,
     // we could call that at this point too.
@@ -1338,19 +1301,10 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
       StubFnRefs.erase(Stub);
 
       // Invalidate the stub.  If it is a GV stub, update the JIT's global
-      // mapping for that GV to zero, otherwise, search the string map of
-      // external function names to stubs and remove the entry for this stub.
+      // mapping for that GV to zero.
       GlobalValue *GV = Resolver.invalidateStub(Stub);
       if (GV) {
         TheJIT->updateGlobalMapping(GV, 0);
-      } else {
-        for (StringMapIterator<void*> i = ExtFnStubs.begin(),
-             e = ExtFnStubs.end(); i != e; ++i) {
-          if (i->second == Stub) {
-            ExtFnStubs.erase(i);
-            break;
-          }
-        }
       }
     }
   }
@@ -1421,7 +1375,7 @@ void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
-  
+
   unsigned NumEntries = 0;
   for (unsigned i = 0, e = JT.size(); i != e; ++i)
     NumEntries += JT[i].MBBs.size();
@@ -1441,7 +1395,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty() || JumpTableBase == 0) return;
-  
+
   if (TargetMachine::getRelocationModel() == Reloc::PIC_) {
     assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?");
     // For each jump table, place the offset from the beginning of the table
@@ -1460,8 +1414,8 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
     }
   } else {
     assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?");
-    
-    // For each jump table, map each target in the jump table to the address of 
+
+    // For each jump table, map each target in the jump table to the address of
     // an emitted MachineBasicBlock.
     intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
 
@@ -1480,7 +1434,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize,
   SavedBufferBegin = BufferBegin;
   SavedBufferEnd = BufferEnd;
   SavedCurBufferPtr = CurBufferPtr;
-  
+
   BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
   BufferEnd = BufferBegin+StubSize+1;
 }
@@ -1490,7 +1444,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer,
   SavedBufferBegin = BufferBegin;
   SavedBufferEnd = BufferEnd;
   SavedCurBufferPtr = CurBufferPtr;
-  
+
   BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
   BufferEnd = BufferBegin+StubSize+1;
 }
@@ -1519,15 +1473,15 @@ uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
 uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
   const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
   assert(Index < JT.size() && "Invalid jump table index!");
-  
+
   unsigned Offset = 0;
   unsigned EntrySize = JumpTable->getEntrySize();
-  
+
   for (unsigned i = 0; i < Index; ++i)
     Offset += JT[i].MBBs.size();
-  
+
    Offset *= EntrySize;
-  
+
   return (uintptr_t)((char *)JumpTableBase + Offset);
 }
 
@@ -1572,7 +1526,7 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
   // If we have already code generated the function, just return the address.
   if (void *Addr = getPointerToGlobalIfAvailable(F))
     return Addr;
-  
+
   // Get a stub if the target supports it.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
   JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
@@ -1591,92 +1545,6 @@ void JIT::updateFunctionStub(Function *F) {
   getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter());
 }
 
-/// updateDlsymStubTable - Emit the data necessary to relocate the stubs
-/// that were emitted during code generation.
-///
-void JIT::updateDlsymStubTable() {
-  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
-  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
-  
-  SmallVector<GlobalValue*, 8> GVs;
-  SmallVector<void*, 8> Ptrs;
-  const StringMap<void *> &ExtFns = JE->getExternalFnStubs();
-
-  JE->getJITResolver().getRelocatableGVs(GVs, Ptrs);
-
-  unsigned nStubs = GVs.size() + ExtFns.size();
-  
-  // If there are no relocatable stubs, return.
-  if (nStubs == 0)
-    return;
-
-  // If there are no new relocatable stubs, return.
-  void *CurTable = JE->getMemMgr()->getDlsymTable();
-  if (CurTable && (*(unsigned *)CurTable == nStubs))
-    return;
-  
-  // Calculate the size of the stub info
-  unsigned offset = 4 + 4 * nStubs + sizeof(intptr_t) * nStubs;
-  
-  SmallVector<unsigned, 8> Offsets;
-  for (unsigned i = 0; i != GVs.size(); ++i) {
-    Offsets.push_back(offset);
-    offset += GVs[i]->getName().size() + 1;
-  }
-  for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); 
-       i != e; ++i) {
-    Offsets.push_back(offset);
-    offset += strlen(i->first()) + 1;
-  }
-  
-  // Allocate space for the new "stub", which contains the dlsym table.
-  JE->startGVStub(0, offset, 4);
-  
-  // Emit the number of records
-  JE->emitInt32(nStubs);
-  
-  // Emit the string offsets
-  for (unsigned i = 0; i != nStubs; ++i)
-    JE->emitInt32(Offsets[i]);
-  
-  // Emit the pointers.  Verify that they are at least 2-byte aligned, and set
-  // the low bit to 0 == GV, 1 == Function, so that the client code doing the
-  // relocation can write the relocated pointer at the appropriate place in
-  // the stub.
-  for (unsigned i = 0; i != GVs.size(); ++i) {
-    intptr_t Ptr = (intptr_t)Ptrs[i];
-    assert((Ptr & 1) == 0 && "Stub pointers must be at least 2-byte aligned!");
-    
-    if (isa<Function>(GVs[i]))
-      Ptr |= (intptr_t)1;
-           
-    if (sizeof(Ptr) == 8)
-      JE->emitInt64(Ptr);
-    else
-      JE->emitInt32(Ptr);
-  }
-  for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); 
-       i != e; ++i) {
-    intptr_t Ptr = (intptr_t)i->second | 1;
-
-    if (sizeof(Ptr) == 8)
-      JE->emitInt64(Ptr);
-    else
-      JE->emitInt32(Ptr);
-  }
-  
-  // Emit the strings.
-  for (unsigned i = 0; i != GVs.size(); ++i)
-    JE->emitString(GVs[i]->getName());
-  for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); 
-       i != e; ++i)
-    JE->emitString(i->first());
-  
-  // Tell the JIT memory manager where it is.  The JIT Memory Manager will
-  // deallocate space for the old one, if one existed.
-  JE->getMemMgr()->SetDlsymTable(JE->finishGVStub(0));
-}
-
 /// freeMachineCodeForFunction - release machine code memory for given Function.
 ///
 void JIT::freeMachineCodeForFunction(Function *F) {
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 3796624..80cb999 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -49,23 +49,23 @@ namespace {
     /// ThisAllocated - This is true if this block is currently allocated.  If
     /// not, this can be converted to a FreeRangeHeader.
     unsigned ThisAllocated : 1;
-    
+
     /// PrevAllocated - Keep track of whether the block immediately before us is
     /// allocated.  If not, the word immediately before this header is the size
     /// of the previous block.
     unsigned PrevAllocated : 1;
-    
+
     /// BlockSize - This is the size in bytes of this memory block,
     /// including this header.
     uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
-    
+
 
     /// getBlockAfter - Return the memory block immediately after this one.
     ///
     MemoryRangeHeader &getBlockAfter() const {
       return *(MemoryRangeHeader*)((char*)this+BlockSize);
     }
-    
+
     /// getFreeBlockBefore - If the block before this one is free, return it,
     /// otherwise return null.
     FreeRangeHeader *getFreeBlockBefore() const {
@@ -73,15 +73,15 @@ namespace {
       intptr_t PrevSize = ((intptr_t *)this)[-1];
       return (FreeRangeHeader*)((char*)this-PrevSize);
     }
-    
+
     /// FreeBlock - Turn an allocated block into a free block, adjusting
     /// bits in the object headers, and adding an end of region memory block.
     FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
-    
+
     /// TrimAllocationToSize - If this allocated block is significantly larger
     /// than NewSize, split it into two pieces (where the former is NewSize
     /// bytes, including the header), and add the new block to the free list.
-    FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList, 
+    FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
                                           uint64_t NewSize);
   };
 
@@ -91,13 +91,13 @@ namespace {
   struct FreeRangeHeader : public MemoryRangeHeader {
     FreeRangeHeader *Prev;
     FreeRangeHeader *Next;
-    
+
     /// getMinBlockSize - Get the minimum size for a memory block.  Blocks
     /// smaller than this size cannot be created.
     static unsigned getMinBlockSize() {
       return sizeof(FreeRangeHeader)+sizeof(intptr_t);
     }
-    
+
     /// SetEndOfBlockSizeMarker - The word at the end of every free block is
     /// known to be the size of the free block.  Set it for this block.
     void SetEndOfBlockSizeMarker() {
@@ -110,7 +110,7 @@ namespace {
       Next->Prev = Prev;
       return Prev->Next = Next;
     }
-    
+
     void AddToFreeList(FreeRangeHeader *FreeList) {
       Next = FreeList;
       Prev = FreeList->Prev;
@@ -121,7 +121,7 @@ namespace {
     /// GrowBlock - The block after this block just got deallocated.  Merge it
     /// into the current block.
     void GrowBlock(uintptr_t NewSize);
-    
+
     /// AllocateBlock - Mark this entire block allocated, updating freelists
     /// etc.  This returns a pointer to the circular free-list.
     FreeRangeHeader *AllocateBlock();
@@ -137,7 +137,7 @@ FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
   // Mark this block allocated.
   ThisAllocated = 1;
   getBlockAfter().PrevAllocated = 1;
- 
+
   // Remove it from the free list.
   return RemoveFromFreeList();
 }
@@ -150,9 +150,9 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
   MemoryRangeHeader *FollowingBlock = &getBlockAfter();
   assert(ThisAllocated && "This block is already free!");
   assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
-  
+
   FreeRangeHeader *FreeListToReturn = FreeList;
-  
+
   // If the block after this one is free, merge it into this block.
   if (!FollowingBlock->ThisAllocated) {
     FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
@@ -164,18 +164,18 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
       assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
     }
     FollowingFreeBlock.RemoveFromFreeList();
-    
+
     // Include the following block into this one.
     BlockSize += FollowingFreeBlock.BlockSize;
     FollowingBlock = &FollowingFreeBlock.getBlockAfter();
-    
+
     // Tell the block after the block we are coalescing that this block is
     // allocated.
     FollowingBlock->PrevAllocated = 1;
   }
-  
+
   assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
-  
+
   if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
     PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
     return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
@@ -218,24 +218,24 @@ TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
   // Round up size for alignment of header.
   unsigned HeaderAlign = __alignof(FreeRangeHeader);
   NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
-  
+
   // Size is now the size of the block we will remove from the start of the
   // current block.
   assert(NewSize <= BlockSize &&
          "Allocating more space from this block than exists!");
-  
+
   // If splitting this block will cause the remainder to be too small, do not
   // split the block.
   if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
     return FreeList;
-  
+
   // Otherwise, we splice the required number of bytes out of this block, form
   // a new block immediately after it, then mark this block allocated.
   MemoryRangeHeader &FormerNextBlock = getBlockAfter();
-  
+
   // Change the size of this block.
   BlockSize = NewSize;
-  
+
   // Get the new block we just sliced out and turn it into a free block.
   FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
   NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
@@ -283,7 +283,7 @@ namespace {
     sys::MemoryBlock LastSlab;
 
     // Memory slabs allocated by the JIT.  We refer to them as slabs so we don't
-    // confuse them with the blocks of memory descibed above.
+    // confuse them with the blocks of memory described above.
     std::vector<sys::MemoryBlock> CodeSlabs;
     JITSlabAllocator BumpSlabAllocator;
     BumpPtrAllocator StubAllocator;
@@ -296,7 +296,6 @@ namespace {
     MemoryRangeHeader *CurBlock;
 
     uint8_t *GOTBase;     // Target Specific reserved memory
-    void *DlsymTable;     // Stub external symbol information
   public:
     DefaultJITMemoryManager();
     ~DefaultJITMemoryManager();
@@ -318,7 +317,6 @@ namespace {
     static const size_t DefaultSizeThreshold;
 
     void AllocateGOT();
-    void SetDlsymTable(void *);
 
     // Testing methods.
     virtual bool CheckInvariants(std::string &ErrorStr);
@@ -349,7 +347,7 @@ namespace {
       }
 
       largest = largest - sizeof(MemoryRangeHeader);
-      
+
       // If this block isn't big enough for the allocation desired, allocate
       // another block of memory and add it to the free list.
       if (largest < ActualSize ||
@@ -445,34 +443,30 @@ namespace {
       return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
     }
 
-    /// startExceptionTable - Use startFunctionBody to allocate memory for the 
+    /// startExceptionTable - Use startFunctionBody to allocate memory for the
     /// function's exception table.
     uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
       return startFunctionBody(F, ActualSize);
     }
 
-    /// endExceptionTable - The exception table of F is now allocated, 
+    /// endExceptionTable - The exception table of F is now allocated,
     /// and takes the memory in the range [TableStart,TableEnd).
     void endExceptionTable(const Function *F, uint8_t *TableStart,
                            uint8_t *TableEnd, uint8_t* FrameRegister) {
       assert(TableEnd > TableStart);
       assert(TableStart == (uint8_t *)(CurBlock+1) &&
              "Mismatched table start/end!");
-      
+
       uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
 
       // Release the memory at the end of this block that isn't needed.
       FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
     }
-    
+
     uint8_t *getGOTBase() const {
       return GOTBase;
     }
-    
-    void *getDlsymTable() const {
-      return DlsymTable;
-    }
-    
+
     void deallocateBlock(void *Block) {
       // Find the block that is allocated for this function.
       MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
@@ -561,16 +555,16 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
   //  END ]
   //
   // The last three blocks are never deallocated or touched.
-  
+
   // Add MemoryRangeHeader to the end of the memory region, indicating that
   // the space after the block of memory is allocated.  This is block #3.
   MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
   Mem3->ThisAllocated = 1;
   Mem3->PrevAllocated = 0;
   Mem3->BlockSize     = sizeof(MemoryRangeHeader);
-  
+
   /// Add a tiny free region so that the free list always has one entry.
-  FreeRangeHeader *Mem2 = 
+  FreeRangeHeader *Mem2 =
     (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
   Mem2->ThisAllocated = 0;
   Mem2->PrevAllocated = 1;
@@ -584,7 +578,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
   Mem1->ThisAllocated = 1;
   Mem1->PrevAllocated = 0;
   Mem1->BlockSize     = sizeof(MemoryRangeHeader);
-  
+
   // Add a FreeRangeHeader to the start of the function body region, indicating
   // that the space is free.  Mark the previous block allocated so we never look
   // at it.
@@ -594,12 +588,11 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
   Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
   Mem0->SetEndOfBlockSizeMarker();
   Mem0->AddToFreeList(Mem2);
-  
+
   // Start out with the freelist pointing to Mem0.
   FreeMemoryList = Mem0;
 
   GOTBase = NULL;
-  DlsymTable = NULL;
 }
 
 void DefaultJITMemoryManager::AllocateGOT() {
@@ -608,10 +601,6 @@ void DefaultJITMemoryManager::AllocateGOT() {
   HasGOT = true;
 }
 
-void DefaultJITMemoryManager::SetDlsymTable(void *ptr) {
-  DlsymTable = ptr;
-}
-
 DefaultJITMemoryManager::~DefaultJITMemoryManager() {
   for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
     sys::Memory::ReleaseRWX(CodeSlabs[i]);
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index 76d81c2..365ec05 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -172,10 +172,9 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
         verbose("  Linking in module: " + aModule->getModuleIdentifier());
 
         // Link it in
-        if (LinkInModule(aModule, &moduleErrorMsg)) {
+        if (LinkInModule(aModule, &moduleErrorMsg))
           return error("Cannot link in module '" +
                        aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
-        }
       } 
     }
     
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index 61f3c26..2c22550 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -70,7 +70,7 @@ Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
 
 /// LinkInLibrary - links one library into the HeadModule.
 ///
-bool Linker::LinkInLibrary(const StringRef &Lib, bool& is_native) {
+bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
   is_native = false;
   // Determine where this library lives.
   sys::Path Pathname = FindLib(Lib);
@@ -160,14 +160,17 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
   // Check for a file of name "-", which means "read standard input"
   if (File.str() == "-") {
     std::auto_ptr<Module> M;
-    if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) {
+    MemoryBuffer *Buffer = MemoryBuffer::getSTDIN();
+    if (!Buffer->getBufferSize()) {
+      delete Buffer;
+      Error = "standard input is empty";
+    } else {
       M.reset(ParseBitcodeFile(Buffer, Context, &Error));
       delete Buffer;
       if (M.get())
         if (!LinkInModule(M.get(), &Error))
           return false;
-    } else 
-      Error = "standard input is empty";
+    }
     return error("Cannot link stdin: " + Error);
   }
 
@@ -187,7 +190,6 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
     case sys::Archive_FileType:
       // A user may specify an ar archive without -l, perhaps because it
       // is not installed as a library. Detect that and link the archive.
-      verbose("Linking archive file '" + File.str() + "'");
       if (LinkInArchive(File, is_native))
         return true;
       break;
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index aef79d0..32aa0f9 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Config/config.h"
 using namespace llvm;
 
-Linker::Linker(const StringRef &progname, const StringRef &modname,
-               LLVMContext& C, unsigned flags): 
+Linker::Linker(StringRef progname, StringRef modname,
+               LLVMContext& C, unsigned flags):
   Context(C),
   Composite(new Module(modname, C)),
   LibPaths(),
@@ -29,7 +29,7 @@ Linker::Linker(const StringRef &progname, const StringRef &modname,
   Error(),
   ProgramName(progname) { }
 
-Linker::Linker(const StringRef &progname, Module* aModule, unsigned flags) : 
+Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
   Context(aModule->getContext()),
   Composite(aModule),
   LibPaths(),
@@ -42,7 +42,7 @@ Linker::~Linker() {
 }
 
 bool
-Linker::error(const StringRef &message) {
+Linker::error(StringRef message) {
   Error = message;
   if (!(Flags&QuietErrors))
     errs() << ProgramName << ": error: " << message << "\n";
@@ -50,7 +50,7 @@ Linker::error(const StringRef &message) {
 }
 
 bool
-Linker::warning(const StringRef &message) {
+Linker::warning(StringRef message) {
   Error = message;
   if (!(Flags&QuietWarnings))
     errs() << ProgramName << ": warning: " << message << "\n";
@@ -58,7 +58,7 @@ Linker::warning(const StringRef &message) {
 }
 
 void
-Linker::verbose(const StringRef &message) {
+Linker::verbose(StringRef message) {
   if (Flags&Verbose)
     errs() << "  " << message << "\n";
 }
@@ -114,7 +114,7 @@ Linker::LoadObject(const sys::Path &FN) {
 
 // IsLibrary - Determine if "Name" is a library in "Directory". Return
 // a non-empty sys::Path if its found, an empty one otherwise.
-static inline sys::Path IsLibrary(const StringRef &Name,
+static inline sys::Path IsLibrary(StringRef Name,
                                   const sys::Path &Directory) {
 
   sys::Path FullPath(Directory);
@@ -153,7 +153,7 @@ static inline sys::Path IsLibrary(const StringRef &Name,
 /// Path if no matching file can be found.
 ///
 sys::Path
-Linker::FindLib(const StringRef &Filename) {
+Linker::FindLib(StringRef Filename) {
   // Determine if the pathname can be found as it stands.
   sys::Path FilePath(Filename);
   if (FilePath.canRead() &&
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e939f37..b6ebb1a 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -58,7 +58,7 @@ public:
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0);
 
-  virtual void EmitBytes(const StringRef &Data);
+  virtual void EmitBytes(StringRef Data);
 
   virtual void EmitValue(const MCExpr *Value, unsigned Size);
 
@@ -186,7 +186,7 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
   OS << '\n';
 }
 
-void MCAsmStreamer::EmitBytes(const StringRef &Data) {
+void MCAsmStreamer::EmitBytes(StringRef Data) {
   assert(CurSection && "Cannot emit contents before setting section!");
   for (unsigned i = 0, e = Data.size(); i != e; ++i)
     OS << ".byte " << (unsigned) (unsigned char) Data[i] << '\n';
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 4f39f1e..1f5b6f1 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -180,7 +180,7 @@ public:
     OS << StringRef(Zeros, N % 16);
   }
 
-  void WriteString(const StringRef &Str, unsigned ZeroFillSize = 0) {
+  void WriteString(StringRef Str, unsigned ZeroFillSize = 0) {
     OS << Str;
     if (ZeroFillSize)
       WriteZeros(ZeroFillSize - Str.size());
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 09479c5..45d2c02 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -23,7 +23,7 @@ MCContext::~MCContext() {
   // we don't need to free them here.
 }
 
-MCSymbol *MCContext::CreateSymbol(const StringRef &Name) {
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
   assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!");
 
   // Create and bind the symbol, and ensure that names are unique.
@@ -32,7 +32,7 @@ MCSymbol *MCContext::CreateSymbol(const StringRef &Name) {
   return Entry = new (*this) MCSymbol(Name, false);
 }
 
-MCSymbol *MCContext::GetOrCreateSymbol(const StringRef &Name) {
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
   MCSymbol *&Entry = Symbols[Name];
   if (Entry) return Entry;
 
@@ -46,7 +46,7 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
 }
 
 
-MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) {
+MCSymbol *MCContext::CreateTemporarySymbol(StringRef Name) {
   // If unnamed, just create a symbol.
   if (Name.empty())
     new (*this) MCSymbol("", true);
@@ -57,6 +57,6 @@ MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) {
   return Entry = new (*this) MCSymbol(Name, true);
 }
 
-MCSymbol *MCContext::LookupSymbol(const StringRef &Name) const {
+MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
   return Symbols.lookup(Name);
 }
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index c950ff2..a5a2256 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -133,8 +133,7 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
   return new (Ctx) MCSymbolRefExpr(Sym);
 }
 
-const MCSymbolRefExpr *MCSymbolRefExpr::Create(const StringRef &Name,
-                                               MCContext &Ctx) {
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, MCContext &Ctx) {
   return Create(Ctx.GetOrCreateSymbol(Name), Ctx);
 }
 
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 189f072..828b92a 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -134,7 +134,7 @@ public:
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0);
 
-  virtual void EmitBytes(const StringRef &Data);
+  virtual void EmitBytes(StringRef Data);
 
   virtual void EmitValue(const MCExpr *Value, unsigned Size);
 
@@ -315,7 +315,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
     SectData.setAlignment(ByteAlignment);
 }
 
-void MCMachOStreamer::EmitBytes(const StringRef &Data) {
+void MCMachOStreamer::EmitBytes(StringRef Data) {
   MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   if (!DF)
     DF = new MCDataFragment(CurSectionData);
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 3cd22ca..ddc4e69 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -45,7 +45,7 @@ namespace {
     virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                               unsigned Size = 0, unsigned ByteAlignment = 0) {}
 
-    virtual void EmitBytes(const StringRef &Data) {}
+    virtual void EmitBytes(StringRef Data) {}
 
     virtual void EmitValue(const MCExpr *Value, unsigned Size) {}
 
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 333a471..24c89ef 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -25,7 +25,7 @@ MCSection::~MCSection() {
 //===----------------------------------------------------------------------===//
 
 MCSectionCOFF *MCSectionCOFF::
-Create(const StringRef &Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
+Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
   return new (Ctx) MCSectionCOFF(Name, IsDirective, K);
 }
 
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 660a8c9..c6812ed 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -15,7 +15,7 @@
 using namespace llvm;
 
 MCSectionELF *MCSectionELF::
-Create(const StringRef &Section, unsigned Type, unsigned Flags,
+Create(StringRef Section, unsigned Type, unsigned Flags,
        SectionKind K, bool isExplicit, MCContext &Ctx) {
   return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit);
 }
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index b3aeb9c..6cc67a2 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -66,7 +66,7 @@ ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
 
 
 MCSectionMachO *MCSectionMachO::
-Create(const StringRef &Segment, const StringRef &Section,
+Create(StringRef Segment, StringRef Section,
        unsigned TypeAndAttributes, unsigned Reserved2,
        SectionKind K, MCContext &Ctx) {
   // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero.
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 86ff3f3..b145d07 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -35,7 +35,7 @@ static void MangleLetter(raw_ostream &OS, unsigned char C) {
 
 /// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
 /// for this assembler.
-static bool NameNeedsEscaping(const StringRef &Str, const MCAsmInfo &MAI) {
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
   assert(!Str.empty() && "Cannot create an empty MCSymbol");
   
   // If the first character is a number and the target does not allow this, we
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 626daa2..59340d4 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -765,6 +766,11 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       free(*i);
   }
 
+  DEBUG(errs() << "\nArgs: ";
+        for (int i = 0; i < argc; ++i)
+          errs() << argv[i] << ' ';
+       );
+
   // If we had an error processing our arguments, don't let the program execute
   if (ErrorParsing) exit(1);
 }
@@ -1147,9 +1153,12 @@ public:
 #ifndef NDEBUG
     OS << " with assertions";
 #endif
+    std::string CPU = sys::getHostCPUName();
+    if (CPU == "generic") CPU = "(unknown)";
     OS << ".\n"
        << "  Built " << __DATE__ << " (" << __TIME__ << ").\n"
        << "  Host: " << sys::getHostTriple() << '\n'
+       << "  Host CPU: " << CPU << '\n'
        << '\n'
        << "  Registered Targets:\n";
 
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 423e90d..e427f82 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -492,6 +492,30 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   return ConstantRange(L, U);
 }
 
+/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is zero extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  if (SrcTySize > DstTySize)
+    return truncate(DstTySize);
+  else if (SrcTySize < DstTySize)
+    return zeroExtend(DstTySize);
+  else
+    return *this;
+}
+
+/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is sign extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  if (SrcTySize > DstTySize)
+    return truncate(DstTySize);
+  else if (SrcTySize < DstTySize)
+    return signExtend(DstTySize);
+  else
+    return *this;
+}
+
 ConstantRange
 ConstantRange::add(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
@@ -585,6 +609,43 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
   return ConstantRange(Lower, Upper);
 }
 
+ConstantRange
+ConstantRange::shl(const ConstantRange &Amount) const {
+  if (isEmptySet())
+    return *this;
+
+  APInt min = getUnsignedMin() << Amount.getUnsignedMin();
+  APInt max = getUnsignedMax() << Amount.getUnsignedMax();
+
+  // there's no overflow!
+  APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
+  if (Zeros.uge(Amount.getUnsignedMax()))
+    return ConstantRange(min, max);
+
+  // FIXME: implement the other tricky cases
+  return ConstantRange(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::ashr(const ConstantRange &Amount) const {
+  if (isEmptySet())
+    return *this;
+
+  APInt min = getUnsignedMax().ashr(Amount.getUnsignedMin());
+  APInt max = getUnsignedMin().ashr(Amount.getUnsignedMax());
+  return ConstantRange(min, max);
+}
+
+ConstantRange
+ConstantRange::lshr(const ConstantRange &Amount) const {
+  if (isEmptySet())
+    return *this;
+  
+  APInt min = getUnsignedMax().lshr(Amount.getUnsignedMin());
+  APInt max = getUnsignedMin().lshr(Amount.getUnsignedMax());
+  return ConstantRange(min, max);
+}
+
 /// print - Print out the bounds to a stream...
 ///
 void ConstantRange::print(raw_ostream &OS) const {
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index d4954b6..50abe01 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -62,7 +62,7 @@ bool llvm::isCurrentDebugType(const char *DebugType) {
 /// option were specified.  Note that DebugFlag also needs to be set to true for
 /// debug output to be produced.
 ///
-void SetCurrentDebugType(const char *Type) {
+void llvm::SetCurrentDebugType(const char *Type) {
   CurrentDebugType = Type;
 }
 
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 88e2050..b04864a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -70,7 +70,7 @@ namespace {
 class MemoryBufferMem : public MemoryBuffer {
   std::string FileID;
 public:
-  MemoryBufferMem(const char *Start, const char *End, const char *FID,
+  MemoryBufferMem(const char *Start, const char *End, StringRef FID,
                   bool Copy = false)
   : FileID(FID) {
     if (!Copy)
@@ -107,7 +107,7 @@ MemoryBuffer *MemoryBuffer::getMemBufferCopy(const char *StartPtr,
 /// initialize the memory allocated by this method.  The memory is owned by
 /// the MemoryBuffer object.
 MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
-                                                  const char *BufferName) {
+                                                  StringRef BufferName) {
   char *Buf = (char *)malloc((Size+1) * sizeof(char));
   if (!Buf) return 0;
   Buf[Size] = 0;
@@ -134,17 +134,12 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size,
 /// if the Filename is "-".  If an error occurs, this returns null and fills
 /// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
 /// returns an empty buffer.
-MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename,
+MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename,
                                            std::string *ErrStr,
                                            int64_t FileSize) {
-  if (Filename[0] != '-' || Filename[1] != 0)
-    return getFile(Filename, ErrStr, FileSize);
-  MemoryBuffer *M = getSTDIN();
-  if (M) return M;
-
-  // If stdin was empty, M is null.  Cons up an empty memory buffer now.
-  const char *EmptyStr = "";
-  return MemoryBuffer::getMemBuffer(EmptyStr, EmptyStr, "<stdin>");
+  if (Filename == "-")
+    return getSTDIN();
+  return getFile(Filename, ErrStr, FileSize);
 }
 
 //===----------------------------------------------------------------------===//
@@ -158,7 +153,7 @@ namespace {
 class MemoryBufferMMapFile : public MemoryBuffer {
   std::string Filename;
 public:
-  MemoryBufferMMapFile(const char *filename, const char *Pages, uint64_t Size)
+  MemoryBufferMMapFile(StringRef filename, const char *Pages, uint64_t Size)
     : Filename(filename) {
     init(Pages, Pages+Size);
   }
@@ -173,13 +168,13 @@ public:
 };
 }
 
-MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
+MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
                                     int64_t FileSize) {
   int OpenFlags = 0;
 #ifdef O_BINARY
   OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
 #endif
-  int FD = ::open(Filename, O_RDONLY|OpenFlags);
+  int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
   if (FD == -1) {
     if (ErrStr) *ErrStr = "could not open file";
     return 0;
@@ -203,6 +198,8 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
   // for small files, because this can severely fragment our address space. Also
   // don't try to map files that are exactly a multiple of the system page size,
   // as the file would not have the required null terminator.
+  //
+  // FIXME: Can we just mmap an extra page in the latter case?
   if (FileSize >= 4096*4 &&
       (FileSize & (sys::Process::GetPageSize()-1)) != 0) {
     if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
@@ -262,6 +259,9 @@ MemoryBuffer *MemoryBuffer::getSTDIN() {
   std::vector<char> FileData;
 
   // Read in all of the data from stdin, we cannot mmap stdin.
+  //
+  // FIXME: That isn't necessarily true, we should try to mmap stdin and
+  // fallback if it fails.
   sys::Program::ChangeStdinToBinary();
   size_t ReadBytes;
   do {
@@ -271,8 +271,6 @@ MemoryBuffer *MemoryBuffer::getSTDIN() {
 
   FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end().
   size_t Size = FileData.size();
-  if (Size <= 1)
-    return 0;
   MemoryBuffer *B = new STDINBufferFile();
   B->initCopyOf(&FileData[0], &FileData[Size-1]);
   return B;
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index c72f121..1b233ab 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include <cstring>
 using namespace llvm;
 
@@ -56,3 +57,24 @@ void llvm::SplitString(const std::string &Source,
     S2 = getToken(S, Delimiters);
   }
 }
+
+void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
+                            StringRef Separators, int MaxSplit,
+                            bool KeepEmpty) const {
+  StringRef rest = *this;
+
+  // rest.data() is used to distinguish cases like "a," that splits into
+  // "a" + "" and "a" that splits into "a" + 0.
+  for (int splits = 0;
+       rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+       ++splits) {
+    std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
+
+    if (p.first.size() != 0 || KeepEmpty)
+      A.push_back(p.first);
+    rest = p.second;
+  }
+  // If we have a tail left, add it.
+  if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+    A.push_back(rest);
+}
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index a729d3d..6f28277 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -52,7 +52,7 @@ void StringMapImpl::init(unsigned InitSize) {
 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
 /// case, the FullHashValue field of the bucket will be set to the hash value
 /// of the string.
-unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) {
+unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
   unsigned HTSize = NumBuckets;
   if (HTSize == 0) {  // Hash table unallocated so far?
     init(16);
@@ -110,7 +110,7 @@ unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) {
 /// FindKey - Look up the bucket that contains the specified key. If it exists
 /// in the map, return the bucket number of the key.  Otherwise return -1.
 /// This does not modify the map.
-int StringMapImpl::FindKey(const StringRef &Key) const {
+int StringMapImpl::FindKey(StringRef Key) const {
   unsigned HTSize = NumBuckets;
   if (HTSize == 0) return -1;  // Really empty table?
   unsigned FullHashValue = HashString(Key);
@@ -161,7 +161,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 
 /// RemoveKey - Remove the StringMapEntry for the specified key from the
 /// table, returning it.  If the key is not in the table, this returns null.
-StringMapEntryBase *StringMapImpl::RemoveKey(const StringRef &Key) {
+StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
   int Bucket = FindKey(Key);
   if (Bucket == -1) return 0;
   
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index deaa19e..51e1100 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -15,6 +15,26 @@ using namespace llvm;
 const size_t StringRef::npos;
 #endif
 
+static char ascii_tolower(char x) {
+  if (x >= 'A' && x <= 'Z')
+    return x - 'A' + 'a';
+  return x;
+}
+
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
+    char LHC = ascii_tolower(Data[I]);
+    char RHC = ascii_tolower(RHS.Data[I]);
+    if (LHC != RHC)
+      return LHC < RHC ? -1 : 1;
+  }
+
+  if (Length == RHS.Length)
+        return 0;
+  return Length < RHS.Length ? -1 : 1;
+}
+
 //===----------------------------------------------------------------------===//
 // String Searching
 //===----------------------------------------------------------------------===//
@@ -24,11 +44,11 @@ const size_t StringRef::npos;
 ///
 /// \return - The index of the first occurence of \arg Str, or npos if not
 /// found.
-size_t StringRef::find(const StringRef &Str) const {
+size_t StringRef::find(StringRef Str, size_t From) const {
   size_t N = Str.size();
   if (N > Length)
     return npos;
-  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+  for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
     if (substr(i, N).equals(Str))
       return i;
   return npos;
@@ -38,7 +58,7 @@ size_t StringRef::find(const StringRef &Str) const {
 ///
 /// \return - The index of the last occurence of \arg Str, or npos if not
 /// found.
-size_t StringRef::rfind(const StringRef &Str) const {
+size_t StringRef::rfind(StringRef Str) const {
   size_t N = Str.size();
   if (N > Length)
     return npos;
@@ -50,19 +70,34 @@ size_t StringRef::rfind(const StringRef &Str) const {
   return npos;
 }
 
-/// find_first_of - Find the first character from the string 'Chars' in the
-/// current string or return npos if not in string.
-StringRef::size_type StringRef::find_first_of(StringRef Chars) const {
-  for (size_type i = 0, e = Length; i != e; ++i)
+/// find_first_of - Find the first character in the string that is in \arg
+/// Chars, or npos if not found.
+///
+/// Note: O(size() * Chars.size())
+StringRef::size_type StringRef::find_first_of(StringRef Chars,
+                                              size_t From) const {
+  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
     if (Chars.find(Data[i]) != npos)
       return i;
   return npos;
 }
 
 /// find_first_not_of - Find the first character in the string that is not
-/// in the string 'Chars' or return npos if all are in string. Same as find.
-StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const {
-  for (size_type i = 0, e = Length; i != e; ++i)
+/// \arg C or npos if not found.
+StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
+  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
+    if (Data[i] != C)
+      return i;
+  return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string \arg Chars, or npos if not found.
+///
+/// Note: O(size() * Chars.size())
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
+                                                  size_t From) const {
+  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
     if (Chars.find(Data[i]) == npos)
       return i;
   return npos;
@@ -75,7 +110,7 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const {
 
 /// count - Return the number of non-overlapped occurrences of \arg Str in
 /// the string.
-size_t StringRef::count(const StringRef &Str) const {
+size_t StringRef::count(StringRef Str) const {
   size_t Count = 0;
   size_t N = Str.size();
   if (N > Length)
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index dd58d1f..7d32ee6 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -66,7 +66,7 @@ static TimerGroup *getDefaultTimerGroup() {
     }
     llvm_release_global_lock();
   }
-  
+
   return tmp;
 }
 
@@ -145,7 +145,7 @@ static TimeRecord getTimeRecord(bool Start) {
 static ManagedStatic<std::vector<Timer*> > ActiveTimers;
 
 void Timer::startTimer() {
-  sys::SmartScopedLock<true> L(Lock);
+  sys::SmartScopedLock<true> L(*TimerLock);
   Started = true;
   ActiveTimers->push_back(this);
   TimeRecord TR = getTimeRecord(true);
@@ -157,7 +157,7 @@ void Timer::startTimer() {
 }
 
 void Timer::stopTimer() {
-  sys::SmartScopedLock<true> L(Lock);
+  sys::SmartScopedLock<true> L(*TimerLock);
   TimeRecord TR = getTimeRecord(false);
   Elapsed    += TR.Elapsed;
   UserTime   += TR.UserTime;
@@ -175,27 +175,11 @@ void Timer::stopTimer() {
 }
 
 void Timer::sum(const Timer &T) {
-  if (&T < this) {
-    T.Lock.acquire();
-    Lock.acquire();
-  } else {
-    Lock.acquire();
-    T.Lock.acquire();
-  }
-  
   Elapsed    += T.Elapsed;
   UserTime   += T.UserTime;
   SystemTime += T.SystemTime;
   MemUsed    += T.MemUsed;
   PeakMem    += T.PeakMem;
-  
-  if (&T < this) {
-    T.Lock.release();
-    Lock.release();
-  } else {
-    Lock.release();
-    T.Lock.release();
-  }
 }
 
 /// addPeakMemoryMeasurement - This method should be called whenever memory
@@ -203,14 +187,12 @@ void Timer::sum(const Timer &T) {
 /// currently active timers, which will be printed when the timer group prints
 ///
 void Timer::addPeakMemoryMeasurement() {
+  sys::SmartScopedLock<true> L(*TimerLock);
   size_t MemUsed = getMemUsage();
 
   for (std::vector<Timer*>::iterator I = ActiveTimers->begin(),
-         E = ActiveTimers->end(); I != E; ++I) {
-    (*I)->Lock.acquire();
+         E = ActiveTimers->end(); I != E; ++I)
     (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase);
-    (*I)->Lock.release();
-  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -280,14 +262,7 @@ static void printVal(double Val, double Total, raw_ostream &OS) {
 }
 
 void Timer::print(const Timer &Total, raw_ostream &OS) {
-  if (&Total < this) {
-    Total.Lock.acquire();
-    Lock.acquire();
-  } else {
-    Lock.acquire();
-    Total.Lock.acquire();
-  }
-  
+  sys::SmartScopedLock<true> L(*TimerLock);
   if (Total.UserTime)
     printVal(UserTime, Total.UserTime, OS);
   if (Total.SystemTime)
@@ -310,14 +285,6 @@ void Timer::print(const Timer &Total, raw_ostream &OS) {
   OS << Name << "\n";
 
   Started = false;  // Once printed, don't print again
-  
-  if (&Total < this) {
-    Total.Lock.release();
-    Lock.release();
-  } else {
-    Lock.release();
-    Total.Lock.release();
-  }
 }
 
 // GetLibSupportInfoOutputFile - Return a file stream to print our output on...
@@ -329,13 +296,13 @@ llvm::GetLibSupportInfoOutputFile() {
   if (LibSupportInfoOutputFilename == "-")
     return &outs();
 
-  
+
   std::string Error;
   raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(),
                                            Error, raw_fd_ostream::F_Append);
   if (Error.empty())
     return Result;
-  
+
   errs() << "Error opening info-output-file '"
          << LibSupportInfoOutputFilename << " for appending!\n";
   delete Result;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 26a1a4e..840fb98 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -94,6 +94,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case MinGW64: return "mingw64";
   case NetBSD: return "netbsd";
   case OpenBSD: return "openbsd";
+  case Psp: return "psp";
   case Solaris: return "solaris";
   case Win32: return "win32";
   case Haiku: return "haiku";
@@ -102,7 +103,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   return "<invalid>";
 }
 
-Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) {
+Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
   if (Name == "alpha")
     return alpha;
   if (Name == "arm")
@@ -141,7 +142,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) {
   return UnknownArch;
 }
 
-Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) {
+Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
   // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
   // archs which Darwin doesn't use.
 
@@ -178,6 +179,33 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) {
   return Triple::UnknownArch;
 }
 
+// Returns architecture name that is unsderstood by the target assembler.
+const char *Triple::getArchNameForAssembler() {
+  if (getOS() != Triple::Darwin && getVendor() != Triple::Apple)
+    return NULL;
+
+  StringRef Str = getArchName();
+  if (Str == "i386")
+    return "i386";
+  if (Str == "x86_64")
+    return "x86_64";
+  if (Str == "powerpc")
+    return "ppc";
+  if (Str == "powerpc64")
+    return "ppc64";
+  if (Str == "arm")
+    return "arm";
+  if (Str == "armv4t" || Str == "thumbv4t")
+    return "armv4t";
+  if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5" || Str == "thumbv5e")
+    return "armv5";
+  if (Str == "armv6" || Str == "thumbv6")
+    return "armv6";
+  if (Str == "armv7" || Str == "thumbv7")
+    return "armv7";
+  return NULL;
+}
+
 //
 
 void Triple::Parse() const {
@@ -273,6 +301,8 @@ void Triple::Parse() const {
     OS = NetBSD;
   else if (OSName.startswith("openbsd"))
     OS = OpenBSD;
+  else if (OSName.startswith("psp"))
+    OS = Psp;
   else if (OSName.startswith("solaris"))
     OS = Solaris;
   else if (OSName.startswith("win32"))
@@ -393,7 +423,7 @@ void Triple::setOS(OSType Kind) {
   setOSName(getOSTypeName(Kind));
 }
 
-void Triple::setArchName(const StringRef &Str) {
+void Triple::setArchName(StringRef Str) {
   // Work around a miscompilation bug for Twines in gcc 4.0.3.
   SmallString<64> Triple;
   Triple += Str;
@@ -404,11 +434,11 @@ void Triple::setArchName(const StringRef &Str) {
   setTriple(Triple.str());
 }
 
-void Triple::setVendorName(const StringRef &Str) {
+void Triple::setVendorName(StringRef Str) {
   setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
 }
 
-void Triple::setOSName(const StringRef &Str) {
+void Triple::setOSName(StringRef Str) {
   if (hasEnvironment())
     setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
               "-" + getEnvironmentName());
@@ -416,11 +446,11 @@ void Triple::setOSName(const StringRef &Str) {
     setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
 }
 
-void Triple::setEnvironmentName(const StringRef &Str) {
-  setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + 
+void Triple::setEnvironmentName(StringRef Str) {
+  setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
             "-" + Str);
 }
 
-void Triple::setOSAndEnvironmentName(const StringRef &Str) {
+void Triple::setOSAndEnvironmentName(StringRef Str) {
   setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
 }
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
index fd2d952..37591a5 100644
--- a/lib/System/Host.cpp
+++ b/lib/System/Host.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/System/Host.h"
 #include "llvm/Config/config.h"
+#include <string.h>
 
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
@@ -22,3 +23,276 @@
 #include "Win32/Host.inc"
 #endif
 
+//===----------------------------------------------------------------------===//
+//
+//  Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+#endif
+
+
+std::string sys::getHostCPUName() {
+#if defined(__x86_64__) || defined(__i386__)
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+    return "generic";
+  unsigned Family = 0;
+  unsigned Model  = 0;
+  DetectX86FamilyModel(EAX, Family, Model);
+
+  GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  bool Em64T = (EDX >> 29) & 0x1;
+  bool HasSSE3 = (ECX & 0x1);
+
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+  if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+    switch (Family) {
+    case 3:
+      return "i386";
+    case 4:
+      switch (Model) {
+      case 0: // Intel486TM DX processors
+      case 1: // Intel486TM DX processors
+      case 2: // Intel486 SX processors
+      case 3: // Intel487TM processors, IntelDX2 OverDrive® processors,
+              // IntelDX2TM processors
+      case 4: // Intel486 SL processor
+      case 5: // IntelSX2TM processors
+      case 7: // Write-Back Enhanced IntelDX2 processors
+      case 8: // IntelDX4 OverDrive processors, IntelDX4TM processors
+      default: return "i486";
+      }
+    case 5:
+      switch (Model) {
+      case  1: // Pentium OverDrive processor for Pentium processor (60, 66),
+               // Pentium® processors (60, 66)
+      case  2: // Pentium OverDrive processor for Pentium processor (75, 90,
+               // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+               // 150, 166, 200)
+      case  3: // Pentium OverDrive processors for Intel486 processor-based
+               // systems
+        return "pentium";
+
+      case  4: // Pentium OverDrive processor with MMXTM technology for Pentium
+               // processor (75, 90, 100, 120, 133), Pentium processor with
+               // MMXTM technology (166, 200)
+        return "pentium-mmx";
+
+      default: return "pentium";
+      }
+    case 6:
+      switch (Model) {
+      case  1: // Pentium Pro processor
+        return "pentiumpro";
+
+      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
+               // model 03
+      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
+               // model 05, and Intel® Celeron® processor, model 05
+      case  6: // Celeron processor, model 06
+        return "pentium2";
+
+      case  7: // Pentium III processor, model 07, and Pentium III Xeon
+               // processor, model 07
+      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
+               // model 08, and Celeron processor, model 08
+      case 10: // Pentium III Xeon processor, model 0Ah
+      case 11: // Pentium III processor, model 0Bh
+        return "pentium3";
+
+      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+               // 0Dh. All processors are manufactured using the 90 nm process.
+        return "pentium-m";
+
+      case 14: // Intel CoreTM Duo processor, Intel CoreTM Solo processor, model
+               // 0Eh. All processors are manufactured using the 65 nm process.
+        return "yonah";
+
+      case 15: // Intel CoreTM2 Duo processor, Intel CoreTM2 Duo mobile
+               // processor, Intel CoreTM2 Quad processor, Intel CoreTM2 Quad
+               // mobile processor, Intel CoreTM2 Extreme processor, Intel
+               // Pentium Dual-Core processor, Intel Xeon processor, model
+               // 0Fh. All processors are manufactured using the 65 nm process.
+      case 22: // Intel Celeron processor model 16h. All processors are
+               // manufactured using the 65 nm process
+        return "core2";
+
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
+        return "i686"; // FIXME: ???
+
+      case 23: // Intel CoreTM2 Extreme processor, Intel Xeon processor, model
+               // 17h. All processors are manufactured using the 45 nm process.
+               //
+               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+        return "penryn";
+
+      case 26: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 45 nm process.
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+        return "corei7";
+
+      case 28: // Intel Atom processor. All processors are manufactured using
+               // the 45 nm process
+        return "atom";
+
+      default: return "i686";
+      }
+    case 15: {
+      switch (Model) {
+      case  0: // Pentium 4 processor, Intel Xeon processor. All processors are
+               // model 00h and manufactured using the 0.18 micron process.
+      case  1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+               // processor MP, and Intel Celeron processor. All processors are
+               // model 01h and manufactured using the 0.18 micron process.
+      case  2: // Pentium 4 processor, Mobile Intel Pentium 4 processor – M,
+               // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+               // processor, and Mobile Intel Celeron processor. All processors
+               // are model 02h and manufactured using the 0.13 micron process.
+        return (Em64T) ? "x86-64" : "pentium4";
+
+      case  3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+               // processor. All processors are model 03h and manufactured using
+               // the 90 nm process.
+      case  4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+               // Pentium D processor, Intel Xeon processor, Intel Xeon
+               // processor MP, Intel Celeron D processor. All processors are
+               // model 04h and manufactured using the 90 nm process.
+      case  6: // Pentium 4 processor, Pentium D processor, Pentium processor
+               // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+               // MP, Intel Celeron D processor. All processors are model 06h
+               // and manufactured using the 65 nm process.
+        return (Em64T) ? "nocona" : "prescott";
+
+      default:
+        return (Em64T) ? "x86-64" : "pentium4";
+      }
+    }
+
+    default:
+      return "generic";
+    }
+  } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
+    // appears to be no way to generate the wide variety of AMD-specific targets
+    // from the information returned from CPUID.
+    switch (Family) {
+      case 4:
+        return "i486";
+      case 5:
+        switch (Model) {
+        case 6:
+        case 7:  return "k6";
+        case 8:  return "k6-2";
+        case 9:
+        case 13: return "k6-3";
+        default: return "pentium";
+        }
+      case 6:
+        switch (Model) {
+        case 4:  return "athlon-tbird";
+        case 6:
+        case 7:
+        case 8:  return "athlon-mp";
+        case 10: return "athlon-xp";
+        default: return "athlon";
+        }
+      case 15:
+        if (HasSSE3) {
+          return "k8-sse3";
+        } else {
+          switch (Model) {
+          case 1:  return "opteron";
+          case 5:  return "athlon-fx"; // also opteron
+          default: return "athlon64";
+          }
+        }
+      case 16:
+        return "amdfam10";
+    default:
+      return "generic";
+    }
+  }
+#endif
+
+  return "generic";
+}
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index c52f3a8..43c3606 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -121,6 +121,9 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   return false;
 }
 
+static void TimeOutHandler(int Sig) {
+}
+
 static void SetMemoryLimits (unsigned size)
 {
 #if HAVE_SYS_RESOURCE_H
@@ -231,11 +234,14 @@ Program::Wait(unsigned secondsToWait,
     return -1;
   }
 
-  // Install a timeout handler.
+  // Install a timeout handler.  The handler itself does nothing, but the simple
+  // fact of having a handler at all causes the wait below to return with EINTR,
+  // unlike if we used SIG_IGN.
   if (secondsToWait) {
-    memset(&Act, 0, sizeof(Act));
-    Act.sa_handler = SIG_IGN;
+    Act.sa_sigaction = 0;
+    Act.sa_handler = TimeOutHandler;
     sigemptyset(&Act.sa_mask);
+    Act.sa_flags = 0;
     sigaction(SIGALRM, &Act, &Old);
     alarm(secondsToWait);
   }
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 76cc06e..ff1980d 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -103,11 +103,13 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
                                              ObjectCodeEmitter &OCE);
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createNEONPreAllocPass();
 FunctionPass *createNEONMoveFixPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
+FunctionPass *createARMMaxStackAlignmentCalculatorPass();
 
 extern Target TheARMTarget, TheThumbTarget;
 
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index c603708..ddeb1b9 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -520,8 +520,8 @@ namespace ARM_AM {
     return ((AM5Opc >> 8) & 1) ? sub : add;
   }
 
-  /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
-  /// FSTM instructions.
+  /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
+  /// VSTM instructions.
   static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
                                    unsigned char Offset) {
     assert((SubMode == ia || SubMode == db) &&
@@ -541,13 +541,15 @@ namespace ARM_AM {
   //
   // This is used for NEON load / store instructions.
   //
-  // addrmode6 := reg with optional writeback
+  // addrmode6 := reg with optional writeback and alignment
   //
-  // This is stored in three operands [regaddr, regupdate, opc].  The first is
-  // the address register.  The second register holds the value of a post-access
-  // increment for writeback or reg0 if no writeback or if the writeback
-  // increment is the size of the memory access.  The third operand encodes
-  // whether there is writeback to the address register.
+  // This is stored in four operands [regaddr, regupdate, opc, align].  The
+  // first is the address register.  The second register holds the value of
+  // a post-access increment for writeback or reg0 if no writeback or if the
+  // writeback increment is the size of the memory access.  The third
+  // operand encodes whether there is writeback to the address register. The
+  // fourth operand is the value of the alignment specifier to use or zero if
+  // no explicit alignment.
 
   static inline unsigned getAM6Opc(bool WB = false) {
     return (int)WB;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7c5b0f0..b50b609 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,11 +14,16 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARM.h"
 #include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -504,9 +509,9 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
 
   switch (MI.getOpcode()) {
   default: break;
-  case ARM::FCPYS:
-  case ARM::FCPYD:
+  case ARM::VMOVS:
   case ARM::VMOVD:
+  case ARM::VMOVDneon:
   case ARM::VMOVQ: {
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
@@ -556,8 +561,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::FLDD:
-  case ARM::FLDS:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
     if (MI->getOperand(1).isFI() &&
         MI->getOperand(2).isImm() &&
         MI->getOperand(2).getImm() == 0) {
@@ -595,8 +600,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::FSTD:
-  case ARM::FSTS:
+  case ARM::VSTRD:
+  case ARM::VSTRS:
     if (MI->getOperand(1).isFI() &&
         MI->getOperand(2).isImm() &&
         MI->getOperand(2).getImm() == 0) {
@@ -632,17 +637,17 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
                                         DestReg).addReg(SrcReg)));
   } else if (DestRC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg)
                    .addReg(SrcReg));
   } else if (DestRC == ARM::DPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg)
                    .addReg(SrcReg));
   } else if (DestRC == ARM::DPR_VFP2RegisterClass ||
              DestRC == ARM::DPR_8RegisterClass ||
              SrcRC == ARM::DPR_VFP2RegisterClass ||
              SrcRC == ARM::DPR_8RegisterClass) {
     // Always use neon reg-reg move if source or dest is NEON-only regclass.
-    BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg);
+    BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
   } else if (DestRC == ARM::QPRRegisterClass ||
              DestRC == ARM::QPR_VFP2RegisterClass ||
              DestRC == ARM::QPR_8RegisterClass) {
@@ -662,12 +667,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
                             MachineMemOperand::MOStore, 0,
                             MFI.getObjectSize(FI),
-                            MFI.getObjectAlignment(FI));
+                            Align);
 
   if (RC == ARM::GPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
@@ -676,19 +682,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   } else if (RC == ARM::DPRRegisterClass ||
              RC == ARM::DPR_VFP2RegisterClass ||
              RC == ARM::DPR_8RegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else if (RC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else {
     assert((RC == ARM::QPRRegisterClass ||
             RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
     // FIXME: Neon instructions should support predicates
-    BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    if (Align >= 16
+        && (getRegisterInfo().needsStackRealignment(MF))) {
+      BuildMI(MBB, I, DL, get(ARM::VST1q64))
+        .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO)
+        .addReg(SrcReg, getKillRegState(isKill));
+    } else {
+      BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+        addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    }
   }
 }
 
@@ -700,12 +714,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
                             MachineMemOperand::MOLoad, 0,
                             MFI.getObjectSize(FI),
-                            MFI.getObjectAlignment(FI));
+                            Align);
 
   if (RC == ARM::GPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
@@ -713,18 +728,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   } else if (RC == ARM::DPRRegisterClass ||
              RC == ARM::DPR_VFP2RegisterClass ||
              RC == ARM::DPR_8RegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else if (RC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else {
     assert((RC == ARM::QPRRegisterClass ||
             RC == ARM::QPR_VFP2RegisterClass ||
             RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
     // FIXME: Neon instructions should support predicates
-    BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
-      addMemOperand(MMO);
+    if (Align >= 16
+        && (getRegisterInfo().needsStackRealignment(MF))) {
+      BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+        .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO);
+    } else {
+      BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
+        addMemOperand(MMO);
+    }
   }
 }
 
@@ -805,7 +826,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                 DstSubReg)
         .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
     }
-  } else if (Opc == ARM::FCPYS) {
+  } else if (Opc == ARM::VMOVS) {
     unsigned Pred = MI->getOperand(2).getImm();
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
@@ -813,7 +834,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
       unsigned SrcSubReg = MI->getOperand(1).getSubReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
         .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
                 SrcSubReg)
         .addFrameIndex(FI)
@@ -823,7 +844,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
       unsigned DstSubReg = MI->getOperand(0).getSubReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
         .addReg(DstReg,
                 RegState::Define |
                 getDeadRegState(isDead) |
@@ -832,7 +853,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
   }
-  else if (Opc == ARM::FCPYD) {
+  else if (Opc == ARM::VMOVD) {
     unsigned Pred = MI->getOperand(2).getImm();
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
@@ -840,7 +861,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
       unsigned SrcSubReg = MI->getOperand(1).getSubReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
         .addReg(SrcReg,
                 getKillRegState(isKill) | getUndefRegState(isUndef),
                 SrcSubReg)
@@ -850,7 +871,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
       unsigned DstSubReg = MI->getOperand(0).getSubReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
         .addReg(DstReg,
                 RegState::Define |
                 getDeadRegState(isDead) |
@@ -886,15 +907,114 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
              Opc == ARM::tMOVtgpr2gpr ||
              Opc == ARM::tMOVgpr2tgpr) {
     return true;
-  } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) {
+  } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
     return true;
-  } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) {
+  } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
     return false; // FIXME
   }
 
   return false;
 }
 
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+              MachineBasicBlock::iterator I,
+              unsigned DestReg, unsigned SubIdx,
+              const MachineInstr *Orig,
+              const TargetRegisterInfo *TRI) const {
+  DebugLoc dl = Orig->getDebugLoc();
+
+  if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+    DestReg = TRI->getSubReg(DestReg, SubIdx);
+    SubIdx = 0;
+  }
+
+  unsigned Opcode = Orig->getOpcode();
+  switch (Opcode) {
+  default: {
+    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+    MI->getOperand(0).setReg(DestReg);
+    MBB.insert(I, MI);
+    break;
+  }
+  case ARM::tLDRpci_pic:
+  case ARM::t2LDRpci_pic: {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    MachineConstantPool *MCP = MF.getConstantPool();
+    unsigned CPI = Orig->getOperand(1).getIndex();
+    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+    assert(MCPE.isMachineConstantPoolEntry() &&
+           "Expecting a machine constantpool entry!");
+    ARMConstantPoolValue *ACPV =
+      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+    unsigned PCLabelId = AFI->createConstPoolEntryUId();
+    ARMConstantPoolValue *NewCPV = 0;
+    if (ACPV->isGlobalValue())
+      NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+                                        ARMCP::CPValue, 4);
+    else if (ACPV->isExtSymbol())
+      NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+                                        ACPV->getSymbol(), PCLabelId, 4);
+    else if (ACPV->isBlockAddress())
+      NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+                                        ARMCP::CPBlockAddress, 4);
+    else
+      llvm_unreachable("Unexpected ARM constantpool value type!!");
+    CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+                                      DestReg)
+      .addConstantPoolIndex(CPI).addImm(PCLabelId);
+    (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+    break;
+  }
+  }
+
+  MachineInstr *NewMI = prior(I);
+  NewMI->getOperand(0).setSubReg(SubIdx);
+}
+
+bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
+                                  const MachineInstr *MI1,
+                                  const MachineRegisterInfo *MRI) const {
+  int Opcode = MI0->getOpcode();
+  if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) {
+    if (MI1->getOpcode() != Opcode)
+      return false;
+    if (MI0->getNumOperands() != MI1->getNumOperands())
+      return false;
+
+    const MachineOperand &MO0 = MI0->getOperand(1);
+    const MachineOperand &MO1 = MI1->getOperand(1);
+    if (MO0.getOffset() != MO1.getOffset())
+      return false;
+
+    const MachineFunction *MF = MI0->getParent()->getParent();
+    const MachineConstantPool *MCP = MF->getConstantPool();
+    int CPI0 = MO0.getIndex();
+    int CPI1 = MO1.getIndex();
+    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+    ARMConstantPoolValue *ACPV0 =
+      static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+    ARMConstantPoolValue *ACPV1 =
+      static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+    return ACPV0->hasSameValue(ACPV1);
+  }
+
+  return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
+}
+
+unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB,
+                                                unsigned DefaultLimit) const {
+  // If the target processor can predict indirect branches, it is highly
+  // desirable to duplicate them, since it can often make them predictable.
+  if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) &&
+      getSubtarget().hasBranchTargetBuffer())
+    return DefaultLimit + 2;
+  return DefaultLimit;
+}
+
 /// getInstrPredicate - If instruction is predicated, returns its predicate
 /// condition, otherwise returns AL. It also returns the condition code
 /// register by reference.
@@ -1022,6 +1142,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
       break;
     }
     case ARMII::AddrMode4:
+    case ARMII::AddrMode6:
       // Can't fold any offset even if it's zero.
       return false;
     case ARMII::AddrMode5: {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2ba3774..73e854f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===//
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -261,9 +261,20 @@ public:
 
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
-                                              const SmallVectorImpl<unsigned> &Ops,
+                                           const SmallVectorImpl<unsigned> &Ops,
                                               MachineInstr* LoadMI) const;
 
+  virtual void reMaterialize(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI,
+                             unsigned DestReg, unsigned SubIdx,
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo *TRI) const;
+
+  virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
+                           const MachineRegisterInfo *MRI) const;
+
+  virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB,
+                                        unsigned DefaultLimit) const;
 };
 
 static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 70377f9e..19762ee 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -44,10 +44,6 @@ static cl::opt<bool>
 ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
           cl::desc("Reuse repeated frame index values"));
 
-static cl::opt<bool>
-ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false),
-          cl::desc("Dynamically re-align the stack as needed"));
-
 unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
                                                    bool *isSPVFP) {
   if (isSPVFP)
@@ -476,11 +472,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 }
 
 static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
-  // FIXME: For now, force at least 128-bit alignment. This will push the
-  // nightly tester harder for making sure things work correctly. When
-  // we're ready to enable this for real, this goes back to starting at zero.
-  unsigned MaxAlign = 16;
-//  unsigned MaxAlign = 0;
+  unsigned MaxAlign = 0;
 
   for (int i = FFI->getObjectIndexBegin(),
          e = FFI->getObjectIndexEnd(); i != e; ++i) {
@@ -508,20 +500,12 @@ bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
 
 bool ARMBaseRegisterInfo::
 needsStackRealignment(const MachineFunction &MF) const {
-  // Only do this for ARM if explicitly enabled
-  // FIXME: Once it's passing all the tests, enable by default
-  if (!ARMDynamicStackAlign)
-    return false;
-
-  // FIXME: To force more brutal testing, realign whether we need to or not.
-  // Change this to be more selective when we turn it on for real, of course.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-//  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
   return (RealignStack &&
           !AFI->isThumb1OnlyFunction() &&
-          AFI->hasStackFrame() &&
-//          (MFI->getMaxAlignment() > StackAlign) &&
+          (MFI->getMaxAlignment() > StackAlign) &&
           !MFI->hasVarSizedObjects());
 }
 
@@ -529,7 +513,8 @@ bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   if (NoFramePointerElim && MFI->hasCalls())
     return true;
-  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+    || needsStackRealignment(MF);
 }
 
 /// estimateStackSize - Estimate and return the size of the frame.
@@ -604,7 +589,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Calculate and set max stack object alignment early, so we can decide
   // whether we will need stack realignment (and thus FP).
-  if (ARMDynamicStackAlign) {
+  if (RealignStack) {
     unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
                                  calculateMaxStackAlignment(MFI));
     MFI->setMaxAlignment(MaxAlign);
@@ -789,7 +774,8 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
           // Reserve a slot closest to SP or frame pointer.
           const TargetRegisterClass *RC = ARM::GPRRegisterClass;
           RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                           RC->getAlignment()));
+                                                             RC->getAlignment(),
+                                                             false));
         }
       }
     }
@@ -806,7 +792,8 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
   return ARM::LR;
 }
 
-unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned 
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   if (STI.isTargetDarwin() || hasFP(MF))
     return FramePtr;
   return ARM::SP;
@@ -1183,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // as much as possible above, handle the rest, providing a register that is
   // SP+LargeImm.
   assert((Offset ||
-          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) &&
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
          "This code isn't needed if offset already handled!");
 
   unsigned ScratchReg = 0;
@@ -1192,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
   unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
   if (Offset == 0)
-    // Must be addrmode4.
+    // Must be addrmode4/6.
     MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
   else {
     ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
@@ -1346,7 +1334,7 @@ emitPrologue(MachineFunction &MF) const {
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
-  movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI);
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI);
   NumBytes = DPRCSOffset;
   if (NumBytes) {
     // Adjust SP after all the callee-save spills.
@@ -1385,7 +1373,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 static bool isCSRestore(MachineInstr *MI,
                         const ARMBaseInstrInfo &TII,
                         const unsigned *CSRegs) {
-  return ((MI->getOpcode() == (int)ARM::FLDD ||
+  return ((MI->getOpcode() == (int)ARM::VLDRD ||
            MI->getOpcode() == (int)ARM::LDR ||
            MI->getOpcode() == (int)ARM::t2LDRi12) &&
           MI->getOperand(1).isFI() &&
@@ -1411,7 +1399,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
     if (NumBytes != 0)
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
   } else {
-    // Unwind MBBI to point to first LDR / FLDD.
+    // Unwind MBBI to point to first LDR / VLDRD.
     const unsigned *CSRegs = getCalleeSavedRegs();
     if (MBBI != MBB.begin()) {
       do
@@ -1459,7 +1447,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
 
     // Move SP to start of integer callee save spill area 2.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI);
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI);
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize());
 
     // Move SP to start of integer callee save spill area 1.
@@ -1475,4 +1463,48 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
 }
 
+namespace {
+  struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
+    static char ID;
+    MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      MachineRegisterInfo &RI = MF.getRegInfo();
+
+      // Calculate max stack alignment of all already allocated stack objects.
+      unsigned MaxAlign = calculateMaxStackAlignment(FFI);
+
+      // Be over-conservative: scan over all vreg defs and find, whether vector
+      // registers are used. If yes - there is probability, that vector register
+      // will be spilled and thus stack needs to be aligned properly.
+      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+           RegNum < RI.getLastVirtReg(); ++RegNum)
+        MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+      if (FFI->getMaxAlignment() == MaxAlign)
+        return false;
+
+      FFI->setMaxAlignment(MaxAlign);
+      return true;
+    }
+
+    virtual const char *getPassName() const {
+      return "ARM Stack Required Alignment Auto-Detector";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char MaximalStackAlignmentCalculator::ID = 0;
+}
+
+FunctionPass*
+llvm::createARMMaxStackAlignmentCalculatorPass() {
+  return new MaximalStackAlignmentCalculator();
+}
+
 #include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 029e468..4b267b0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -105,7 +105,7 @@ public:
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 13cf676..766acff 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -168,7 +168,8 @@ namespace {
     /// Routines that handle operands which add machine relocations which are
     /// fixed up by the relocation stage.
     void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
-                           bool NeedStub,  bool Indirect, intptr_t ACPV = 0);
+                           bool MayNeedFarStub,  bool Indirect,
+                           intptr_t ACPV = 0);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
     void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
     void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
@@ -277,13 +278,13 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
 ///
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
-                                             bool NeedStub, bool Indirect,
+                                             bool MayNeedFarStub, bool Indirect,
                                              intptr_t ACPV) {
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, ACPV, NeedStub)
+                                           GV, ACPV, MayNeedFarStub)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, ACPV, NeedStub);
+                               GV, ACPV, MayNeedFarStub);
   MCE.addRelocation(MR);
 }
 
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 9819625..d22c43a 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -31,6 +31,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -42,6 +43,13 @@ STATISTIC(NumTBs,        "Number of table branches generated");
 STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
 STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
 STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+          cl::desc("Adjust basic block layout to better use TB[BH]"));
 
 namespace {
   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -174,6 +182,7 @@ namespace {
     void DoInitialPlacement(MachineFunction &MF,
                             std::vector<MachineInstr*> &CPEMIs);
     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+    void JumpTableFunctionScan(MachineFunction &MF);
     void InitialFunctionScan(MachineFunction &MF,
                              const std::vector<MachineInstr*> &CPEMIs);
     MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
@@ -201,7 +210,10 @@ namespace {
     bool UndoLRSpillRestore();
     bool OptimizeThumb2Instructions(MachineFunction &MF);
     bool OptimizeThumb2Branches(MachineFunction &MF);
+    bool ReorderThumb2JumpTables(MachineFunction &MF);
     bool OptimizeThumb2JumpTables(MachineFunction &MF);
+    MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+                                                  MachineBasicBlock *JTBB);
 
     unsigned GetOffsetOf(MachineInstr *MI) const;
     void dumpBBs();
@@ -262,6 +274,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   // the numbers agree with the position of the block in the function.
   MF.RenumberBlocks();
 
+  // Try to reorder and otherwise adjust the block layout to make good use
+  // of the TB[BH] instructions.
+  bool MadeChange = false;
+  if (isThumb2 && AdjustJumpTableBlocks) {
+    JumpTableFunctionScan(MF);
+    MadeChange |= ReorderThumb2JumpTables(MF);
+    // Data is out of date, so clear it. It'll be re-computed later.
+    T2JumpTables.clear();
+    // Blocks may have shifted around. Keep the numbering up to date.
+    MF.RenumberBlocks();
+  }
+
   // Thumb1 functions containing constant pools get 4-byte alignment.
   // This is so we can keep exact track of where the alignment padding goes.
 
@@ -292,7 +316,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
   // Iteratively place constant pool entries and fix up branches until there
   // is no change.
-  bool MadeChange = false;
   unsigned NoCPIters = 0, NoBRIters = 0;
   while (true) {
     bool CPChange = false;
@@ -409,6 +432,21 @@ ARMConstantIslands::CPEntry
   return NULL;
 }
 
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I)
+      if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+        T2JumpTables.push_back(I);
+  }
+}
+
 /// InitialFunctionScan - Do the initial scan of the function, building up
 /// information about the sizes of each block, the location of all the water,
 /// and finding all of the constant pool users.
@@ -541,8 +579,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
             Scale = 4;  // +(offset_8*4)
             break;
 
-          case ARM::FLDD:
-          case ARM::FLDS:
+          case ARM::VLDRD:
+          case ARM::VLDRS:
             Bits = 8;
             Scale = 4;  // +-(offset_8*4)
             NegOk = true;
@@ -1552,7 +1590,6 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
   return MadeChange;
 }
 
-
 /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
 /// jumptables when it's possible.
 bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
@@ -1560,7 +1597,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
 
   // FIXME: After the tables are shrunk, can we get rid some of the
   // constantpool tables?
-  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
@@ -1660,3 +1697,99 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
 
   return MadeChange;
 }
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+    MachineInstr *MI = T2JumpTables[i];
+    const TargetInstrDesc &TID = MI->getDesc();
+    unsigned NumOps = TID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    MachineOperand JTOP = MI->getOperand(JTOpIdx);
+    unsigned JTI = JTOP.getIndex();
+    assert(JTI < JT.size());
+
+    // We prefer if target blocks for the jump table come after the jump
+    // instruction so we can use TB[BH]. Loop through the target blocks
+    // and try to adjust them such that that's true.
+    int JTNumber = MI->getParent()->getNumber();
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+      MachineBasicBlock *MBB = JTBBs[j];
+      int DTNumber = MBB->getNumber();
+
+      if (DTNumber < JTNumber) {
+        // The destination precedes the switch. Try to move the block forward
+        // so we have a positive offset.
+        MachineBasicBlock *NewBB =
+          AdjustJTTargetBlockForward(MBB, MI->getParent());
+        if (NewBB)
+          MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+        MadeChange = true;
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+  MachineFunction &MF = *BB->getParent();
+
+  // If it's the destination block is terminated by an unconditional branch,
+  // try to move it; otherwise, create a new block following the jump
+  // table that branches back to the actual target. This is a very simple
+  // heuristic. FIXME: We can definitely improve it.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  SmallVector<MachineOperand, 4> CondPrior;
+  MachineFunction::iterator BBi = BB;
+  MachineFunction::iterator OldPrior = prior(BBi);
+
+  // If the block terminator isn't analyzable, don't try to move the block
+  bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+  // If the block ends in an unconditional branch, move it. The prior block
+  // has to have an analyzable terminator for us to move this one. Be paranoid
+  // and make sure we're not trying to move the entry block of the function.
+  if (!B && Cond.empty() && BB != MF.begin() &&
+      !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+    BB->moveAfter(JTBB);
+    OldPrior->updateTerminator();
+    BB->updateTerminator();
+    // Update numbering to account for the block being moved.
+    MF.RenumberBlocks();
+    ++NumJTMoved;
+    return NULL;
+  }
+
+  // Create a new MBB for the code after the jump BB.
+  MachineBasicBlock *NewBB =
+    MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+  MachineFunction::iterator MBBI = JTBB; ++MBBI;
+  MF.insert(MBBI, NewBB);
+
+  // Add an unconditional branch from NewBB to BB.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond directly to anything in the source.
+  assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+  BuildMI(NewBB, DebugLoc::getUnknownLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  MF.RenumberBlocks(NewBB);
+
+  // Update the CFG.
+  NewBB->addSuccessor(BB);
+  JTBB->removeSuccessor(BB);
+  JTBB->addSuccessor(NewBB);
+
+  ++NumJTInserted;
+  return NewBB;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index efa941a..90dd0c7 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -62,9 +62,10 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
       ARMConstantPoolValue *CPV =
         (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
       if (CPV->CVal == CVal &&
-          CPV->S == S &&
           CPV->LabelId == LabelId &&
-          CPV->PCAdjust == PCAdjust)
+          CPV->PCAdjust == PCAdjust &&
+          (CPV->S == S || strcmp(CPV->S, S) == 0) &&
+          (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0))
         return i;
     }
   }
@@ -84,6 +85,23 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
   ID.AddInteger(PCAdjust);
 }
 
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+  if (ACPV->Kind == Kind &&
+      ACPV->CVal == CVal &&
+      ACPV->PCAdjust == PCAdjust &&
+      (ACPV->S == S || strcmp(ACPV->S, S) == 0) &&
+      (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) {
+    if (ACPV->LabelId == LabelId)
+      return true;
+    // Two PC relative constpool entries containing the same GV address or
+    // external symbols. FIXME: What about blockaddress?
+    if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+      return true;
+  }
+  return false;
+}
+
 void ARMConstantPoolValue::dump() const {
   errs() << "  " << *this;
 }
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 8fb3f92..741acde 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -81,6 +81,10 @@ public:
 
   virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
 
+  /// hasSameValue - Return true if this ARM constpool value
+  /// can share the same constantpool entry as another ARM constpool value.
+  bool hasSameValue(ARMConstantPoolValue *ACPV);
+
   void print(raw_ostream *O) const { if (O) print(*O); }
   void print(raw_ostream &O) const;
   void dump() const;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 0000000..4d0f899
--- /dev/null
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,115 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expand pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// post- regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+namespace {
+  class ARMExpandPseudo : public MachineFunctionPass {
+  public:
+    static char ID;
+    ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+
+    const TargetInstrInfo *TII;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pseudo instruction expansion pass";
+    }
+
+  private:
+    bool ExpandMBB(MachineBasicBlock &MBB);
+  };
+  char ARMExpandPseudo::ID = 0;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineInstr &MI = *MBBI;
+    MachineBasicBlock::iterator NMBBI = next(MBBI);
+
+    unsigned Opcode = MI.getOpcode();
+    switch (Opcode) {
+    default: break;
+    case ARM::tLDRpci_pic: 
+    case ARM::t2LDRpci_pic: {
+      unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+        ? ARM::tLDRpci : ARM::t2LDRpci;
+      unsigned DstReg = MI.getOperand(0).getReg();
+      if (!MI.getOperand(0).isDead()) {
+        MachineInstr *NewMI =
+          AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                 TII->get(NewLdOpc), DstReg)
+                         .addOperand(MI.getOperand(1)));
+        NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+          .addReg(DstReg, getDefRegState(true))
+          .addReg(DstReg)
+          .addOperand(MI.getOperand(2));
+      }
+      MI.eraseFromParent();
+      Modified = true;
+      break;
+    }
+    case ARM::t2MOVi32imm: {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      unsigned Imm = MI.getOperand(1).getImm();
+      unsigned Lo16 = Imm & 0xffff;
+      unsigned Hi16 = (Imm >> 16) & 0xffff;
+      if (!MI.getOperand(0).isDead()) {
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::t2MOVi16), DstReg)
+                       .addImm(Lo16));
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::t2MOVTi16))
+                       .addReg(DstReg, getDefRegState(true))
+                       .addReg(DstReg).addImm(Hi16));
+      }
+      MI.eraseFromParent();
+      Modified = true;
+    }
+    // FIXME: expand t2MOVi32imm
+    }
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI)
+    Modified |= ExpandMBB(*MFI);
+  return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+  return new ARMExpandPseudo();
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1489cab..9be7454 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -81,7 +81,7 @@ public:
   bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
   bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
-                       SDValue &Opc);
+                       SDValue &Opc, SDValue &Align);
 
   bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
                         SDValue &Label);
@@ -187,8 +187,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 
 
 void ARMDAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
-
   SelectRoot(*CurDAG);
   CurDAG->RemoveDeadNodes();
 }
@@ -491,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
 
 bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
                                       SDValue &Addr, SDValue &Update,
-                                      SDValue &Opc) {
+                                      SDValue &Opc, SDValue &Align) {
   Addr = N;
   // Default to no writeback.
   Update = CurDAG->getRegister(0, MVT::i32);
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
+  // Default to no alignment.
+  Align = CurDAG->getTargetConstant(0, MVT::i32);
   return true;
 }
 
@@ -1010,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+  SDValue MemAddr, MemUpdate, MemOpc, Align;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1036,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
 
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
     std::vector<EVT> ResTys(NumVecs, VT);
     ResTys.push_back(MVT::Other);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1047,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
     // Quad registers are directly supported for VLD2,
     // loading 2 pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
     std::vector<EVT> ResTys(4, VT);
     ResTys.push_back(MVT::Other);
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
     Chain = SDValue(VLd, 4);
 
     // Combine the even and odd subregs to produce the result.
@@ -1071,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
 
     // Load the even subregs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
-    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
+    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
     Chain = SDValue(VLdA, NumVecs+1);
 
     // Load the odd subregs.
     Opc = QOpcodes1[OpcodeIndex];
-    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
-    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
+    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
+                             Align, Chain };
+    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
     Chain = SDValue(VLdB, NumVecs+1);
 
     // Combine the even and odd subregs to produce the result.
@@ -1098,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+  SDValue MemAddr, MemUpdate, MemOpc, Align;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1126,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   Ops.push_back(MemAddr);
   Ops.push_back(MemUpdate);
   Ops.push_back(MemOpc);
+  Ops.push_back(Align);
 
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
       Ops.push_back(N->getOperand(Vec+3));
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1147,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
                                                    N->getOperand(Vec+3)));
     }
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
@@ -1163,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+4);
+                                        MVT::Other, Ops.data(), NumVecs+5);
   Chain = SDValue(VStA, 1);
 
   // Store the odd subregs.
   Ops[0] = SDValue(VStA, 0); // MemAddr
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+    Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
                                                 N->getOperand(Vec+3));
-  Ops[NumVecs+3] = Chain;
+  Ops[NumVecs+4] = Chain;
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+4);
+                                        MVT::Other, Ops.data(), NumVecs+5);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -1188,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+  SDValue MemAddr, MemUpdate, MemOpc, Align;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1226,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
   Ops.push_back(MemAddr);
   Ops.push_back(MemUpdate);
   Ops.push_back(MemOpc);
+  Ops.push_back(Align);
 
   unsigned Opc = 0;
   if (is64BitVector) {
@@ -1463,8 +1466,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     }
     break;
   }
-  case ARMISD::FMRRD:
-    return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+  case ARMISD::VMOVRRD:
+    return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
                                   Op.getOperand(0), getAL(CurDAG),
                                   CurDAG->getRegister(0, MVT::i32));
   case ISD::UMUL_LOHI: {
@@ -1653,10 +1656,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
         : ARM::MOVCCr;
       break;
     case MVT::f32:
-      Opc = ARM::FCPYScc;
+      Opc = ARM::VMOVScc;
       break;
     case MVT::f64:
-      Opc = ARM::FCPYDcc;
+      Opc = ARM::VMOVDcc;
       break;
     }
     return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
@@ -1680,10 +1683,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     default: assert(false && "Illegal conditional move type!");
       break;
     case MVT::f32:
-      Opc = ARM::FNEGScc;
+      Opc = ARM::VNEGScc;
       break;
     case MVT::f64:
-      Opc = ARM::FNEGDcc;
+      Opc = ARM::VNEGDcc;
       break;
     }
     return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b6ce5dd..c3af8e6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -133,7 +133,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
 }
 
 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
-    : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) {
+    : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
   if (Subtarget->isTargetDarwin()) {
@@ -389,7 +389,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
-    // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
+    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2.
     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
 
   // We want to custom lower some of our intrinsics.
@@ -434,7 +434,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
 
   // We have target-specific dag combine patterns for the following nodes:
-  // ARMISD::FMRRD  - No need to call setTargetDAGCombine
+  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::SUB);
 
@@ -493,8 +493,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
   case ARMISD::RRX:           return "ARMISD::RRX";
 
-  case ARMISD::FMRRD:         return "ARMISD::FMRRD";
-  case ARMISD::FMDRR:         return "ARMISD::FMDRR";
+  case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
+  case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
 
   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
@@ -790,7 +790,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                       InFlag);
       Chain = Hi.getValue(1);
       InFlag = Hi.getValue(2);
-      Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
 
       if (VA.getLocVT() == MVT::v2f64) {
         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
@@ -805,7 +805,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
         Chain = Hi.getValue(1);
         InFlag = Hi.getValue(2);
-        Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
                           DAG.getConstant(1, MVT::i32));
       }
@@ -870,7 +870,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
                                          SmallVector<SDValue, 8> &MemOpChains,
                                          ISD::ArgFlagsTy Flags) {
 
-  SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
 
@@ -1004,6 +1004,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   bool isDirect = false;
   bool isARMFunc = false;
   bool isLocalARMFunc = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     GlobalValue *GV = G->getGlobal();
     isDirect = true;
@@ -1015,6 +1017,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     isLocalARMFunc = !Subtarget->isThumb() && !isExt;
     // tBX takes a register source operand.
     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
                                                            ARMPCLabelIndex,
                                                            ARMCP::CPValue, 4);
@@ -1023,7 +1026,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
                            PseudoSourceValue::getConstantPool(), 0);
-      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
    } else
@@ -1036,6 +1039,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // tBX takes a register source operand.
     const char *Sym = S->getSymbol();
     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
                                                        Sym, ARMPCLabelIndex, 4);
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
@@ -1043,7 +1047,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
                            PseudoSourceValue::getConstantPool(), 0);
-      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
     } else
@@ -1145,7 +1149,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
         // Extract the first half and return it in two registers.
         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
                                    DAG.getConstant(0, MVT::i32));
-        SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl,
+        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
 
         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
@@ -1162,7 +1166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
       }
       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
       // available.
-      SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
       Flag = Chain.getValue(1);
@@ -1208,6 +1212,9 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = 0;
   DebugLoc DL = Op.getDebugLoc();
   EVT PtrVT = getPointerTy();
   BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1217,6 +1224,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   } else {
     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
                                                          ARMCP::CPBlockAddress,
                                                          PCAdj);
@@ -1227,7 +1235,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
                                PseudoSourceValue::getConstantPool(), 0);
   if (RelocM == Reloc::Static)
     return Result;
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
 }
 
@@ -1238,6 +1246,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   DebugLoc dl = GA->getDebugLoc();
   EVT PtrVT = getPointerTy();
   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
   ARMConstantPoolValue *CPV =
     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
                              ARMCP::CPValue, PCAdj, "tlsgd", true);
@@ -1247,7 +1258,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                          PseudoSourceValue::getConstantPool(), 0);
   SDValue Chain = Argument.getValue(1);
 
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
 
   // call __tls_get_addr.
@@ -1279,7 +1290,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
 
   if (GV->isDeclaration()) {
-    // initial exec model
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+    // Initial exec model.
     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
     ARMConstantPoolValue *CPV =
       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
@@ -1290,7 +1304,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
                          PseudoSourceValue::getConstantPool(), 0);
     Chain = Offset.getValue(1);
 
-    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -1355,6 +1369,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                                     SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = 0;
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -1363,6 +1380,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   if (RelocM == Reloc::Static)
     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   else {
+    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
     ARMConstantPoolValue *CPV =
       new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
@@ -1375,7 +1393,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   SDValue Chain = Result.getValue(1);
 
   if (RelocM == Reloc::PIC_) {
-    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   }
 
@@ -1390,6 +1408,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
                                                     SelectionDAG &DAG){
   assert(Subtarget->isTargetELF() &&
          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1400,7 +1421,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                                PseudoSourceValue::getConstantPool(), 0);
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
 
@@ -1416,6 +1437,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
   }
   case Intrinsic::eh_sjlj_lsda: {
     MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
     EVT PtrVT = getPointerTy();
     DebugLoc dl = Op.getDebugLoc();
     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -1433,7 +1456,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
     SDValue Chain = Result.getValue(1);
 
     if (RelocM == Reloc::PIC_) {
-      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
     }
     return Result;
@@ -1522,7 +1545,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   if (NextVA.isMemLoc()) {
     unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset());
+    int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(),
+                                    true, false);
 
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1533,7 +1557,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   }
 
-  return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2);
+  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
 }
 
 SDValue
@@ -1636,7 +1660,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
 
       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
-      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+                                      true, false);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1664,7 +1689,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       // the result of va_next.
       AFI->setVarArgsRegSaveSize(VARegSaveSize);
       VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
-                                                 VARegSaveSize - VARegSize);
+                                                 VARegSaveSize - VARegSize,
+                                                 true, false);
       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
 
       SmallVector<SDValue, 4> MemOps;
@@ -1688,7 +1714,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                             &MemOps[0], MemOps.size());
     } else
       // This will point to the next argument passed via stack.
-      VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+      VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false);
   }
 
   return Chain;
@@ -1710,46 +1736,41 @@ static bool isFloatingPointZero(SDValue Op) {
   return false;
 }
 
-static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) {
-  return ( isThumb1Only && (C & ~255U) == 0) ||
-         (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1);
-}
-
 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
 /// the given operands.
-static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                         SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only,
-                         DebugLoc dl) {
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                             SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     unsigned C = RHSC->getZExtValue();
-    if (!isLegalCmpImmediate(C, isThumb1Only)) {
+    if (!isLegalICmpImmediate(C)) {
       // Constant does not fit, try adjusting it by one?
       switch (CC) {
       default: break;
       case ISD::SETLT:
       case ISD::SETGE:
-        if (isLegalCmpImmediate(C-1, isThumb1Only)) {
+        if (isLegalICmpImmediate(C-1)) {
           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
           RHS = DAG.getConstant(C-1, MVT::i32);
         }
         break;
       case ISD::SETULT:
       case ISD::SETUGE:
-        if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) {
+        if (C > 0 && isLegalICmpImmediate(C-1)) {
           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
           RHS = DAG.getConstant(C-1, MVT::i32);
         }
         break;
       case ISD::SETLE:
       case ISD::SETGT:
-        if (isLegalCmpImmediate(C+1, isThumb1Only)) {
+        if (isLegalICmpImmediate(C+1)) {
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
           RHS = DAG.getConstant(C+1, MVT::i32);
         }
         break;
       case ISD::SETULE:
       case ISD::SETUGT:
-        if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) {
+        if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
           RHS = DAG.getConstant(C+1, MVT::i32);
         }
@@ -1785,8 +1806,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
 }
 
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
-                              const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -1798,7 +1818,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMCC;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
   }
 
@@ -1820,8 +1840,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
   return Result;
 }
 
-static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
-                          const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   SDValue  Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue    LHS = Op.getOperand(2);
@@ -1832,7 +1851,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMCC;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
                        Chain, Dest, ARMCC, CCR,Cmp);
   }
@@ -2049,16 +2068,16 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
   SDValue Op = N->getOperand(0);
   DebugLoc dl = N->getDebugLoc();
   if (N->getValueType(0) == MVT::f64) {
-    // Turn i64->f64 into FMDRR.
+    // Turn i64->f64 into VMOVDRR.
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
                              DAG.getConstant(0, MVT::i32));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
                              DAG.getConstant(1, MVT::i32));
-    return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   }
 
-  // Turn f64->i64 into FMRRD.
-  SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
+  // Turn f64->i64 into VMOVRRD.
+  SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
                             DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
 
   // Merge the pieces into a single i64 value.
@@ -2115,8 +2134,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
-static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
-                                   const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -2140,7 +2158,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
 
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMCC, DAG, ST->isThumb1Only(), dl);
+                          ARMCC, DAG, dl);
   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
                            CCR, Cmp);
@@ -2151,8 +2169,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
 
 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
-static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
-                                   const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -2174,7 +2191,7 @@ static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMCC, DAG, ST->isThumb1Only(), dl);
+                          ARMCC, DAG, dl);
   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
                            CCR, Cmp);
@@ -2860,8 +2877,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
-  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG, Subtarget);
-  case ISD::BR_CC:         return LowerBR_CC(Op, DAG, Subtarget);
+  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
+  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
@@ -2878,9 +2895,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SHL:
   case ISD::SRL:
   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
-  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG, Subtarget);
+  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRL_PARTS:
-  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG, Subtarget);
+  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -3155,12 +3172,12 @@ static SDValue PerformSUBCombine(SDNode *N,
   return SDValue();
 }
 
-/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
-static SDValue PerformFMRRDCombine(SDNode *N,
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI) {
   // fmrrd(fmdrr x, y) -> x,y
   SDValue InDouble = N->getOperand(0);
-  if (InDouble.getOpcode() == ARMISD::FMDRR)
+  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
   return SDValue();
 }
@@ -3455,7 +3472,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   default: break;
   case ISD::ADD:      return PerformADDCombine(N, DCI);
   case ISD::SUB:      return PerformSUBCombine(N, DCI);
-  case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
+  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN:
     return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
@@ -3683,6 +3700,18 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  if (!Subtarget->isThumb())
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  if (Subtarget->isThumb2())
+    return ARM_AM::getT2SOImmVal(Imm) != -1; 
+  return Imm >= 0 && Imm <= 255;
+}
+
 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
                                       bool isSEXTLoad, SDValue &Base,
                                       SDValue &Offset, bool &isInc,
@@ -3737,7 +3766,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
     return true;
   }
 
-  // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
   return false;
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9c7a91d..4f31f8a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -62,8 +62,8 @@ namespace llvm {
       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
       RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
 
-      FMRRD,        // double to two gprs.
-      FMDRR,        // Two gprs to double.
+      VMOVRRD,      // double to two gprs.
+      VMOVDRR,      // Two gprs to double.
 
       EH_SJLJ_SETJMP,    // SjLj exception handling setjmp.
       EH_SJLJ_LONGJMP,   // SjLj exception handling longjmp.
@@ -180,6 +180,12 @@ namespace llvm {
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
     bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
 
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can compare
+    /// a register against the immediate without having to materialize the
+    /// immediate into a register.
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
     /// getPreIndexedAddressParts - returns true by value, base pointer and
     /// offset pointer and addressing mode by reference if the node's address
     /// can be legally represented as pre-indexed load / store address.
@@ -278,8 +284,12 @@ namespace llvm {
                                    SelectionDAG &DAG);
     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG);
 
     SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain,
@@ -315,6 +325,9 @@ namespace llvm {
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   DebugLoc dl, SelectionDAG &DAG);
+
+    SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
   };
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 86bbe2a..87bb12b 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -80,22 +80,26 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
 }
 
 void ARMInstrInfo::
-reMaterialize(MachineBasicBlock &MBB,
-              MachineBasicBlock::iterator I,
-              unsigned DestReg, unsigned SubIdx,
-              const MachineInstr *Orig) const {
+reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+              unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
+              const TargetRegisterInfo *TRI) const {
   DebugLoc dl = Orig->getDebugLoc();
-  if (Orig->getOpcode() == ARM::MOVi2pieces) {
+  unsigned Opcode = Orig->getOpcode();
+  switch (Opcode) {
+  default:
+    break;
+  case ARM::MOVi2pieces: {
     RI.emitLoadConstPool(MBB, I, dl,
                          DestReg, SubIdx,
                          Orig->getOperand(1).getImm(),
                          (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
                          Orig->getOperand(3).getReg());
+    MachineInstr *NewMI = prior(I);
+    NewMI->getOperand(0).setSubReg(SubIdx);
     return;
   }
+  }
 
-  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
-  MI->getOperand(0).setReg(DestReg);
-  MBB.insert(I, MI);
+  return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI);
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 5d1678d..4319577 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,15 +35,16 @@ public:
   // Return true if the block does not fall through.
   bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
 
+  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                     unsigned DestReg, unsigned SubIdx,
+                     const MachineInstr *Orig,
+                     const TargetRegisterInfo *TRI) const;
+
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
   ///
   const ARMRegisterInfo &getRegisterInfo() const { return RI; }
-
-  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                     unsigned DestReg, unsigned SubIdx,
-                     const MachineInstr *Orig) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cbe80b4..3fe634e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>,
 // addrmode6 := reg with optional writeback
 //
 def addrmode6 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode6", []> {
+                ComplexPattern<i32, 4, "SelectAddrMode6", []> {
   let PrintMethod = "printAddrMode6Operand";
-  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
+  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
 }
 
 // addrmodepc := pc + reg
@@ -377,15 +377,13 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
                IIC_iALUr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
-    let Inst{4} = 0;
+    let Inst{11-4} = 0b00000000;
     let Inst{25} = 0;
     let isCommutable = Commutable;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                IIC_iALUsr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{25} = 0;
   }
 }
@@ -396,24 +394,22 @@ let Defs = [CPSR] in {
 multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
                          bit Commutable = 0> {
   def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               IIC_iALUi, opc, "s\t$dst, $a, $b",
+               IIC_iALUi, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
     let Inst{20} = 1;
     let Inst{25} = 1;
   }
   def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               IIC_iALUr, opc, "s\t$dst, $a, $b",
+               IIC_iALUr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
     let isCommutable = Commutable;
-    let Inst{4} = 0;
+    let Inst{11-4} = 0b00000000;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
   def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               IIC_iALUsr, opc, "s\t$dst, $a, $b",
+               IIC_iALUsr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
@@ -435,7 +431,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
                opc, "\t$a, $b",
                [(opnode GPR:$a, GPR:$b)]> {
-    let Inst{4} = 0;
+    let Inst{11-4} = 0b00000000;
     let Inst{20} = 1;
     let Inst{25} = 0;
     let isCommutable = Commutable;
@@ -443,8 +439,6 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
                opc, "\t$a, $b",
                [(opnode GPR:$a, so_reg:$b)]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
@@ -501,20 +495,22 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
     let isCommutable = Commutable;
-    let Inst{4} = 0;
+    let Inst{11-4} = 0b00000000;
     let Inst{25} = 0;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                 DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{25} = 0;
   }
-  // Carry setting variants
+}
+// Carry setting variants
+let Defs = [CPSR] in {
+multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
   def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"),
+                DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
@@ -522,26 +518,25 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
     let Inst{25} = 1;
   }
   def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"),
+                DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
-    let Inst{4} = 0;
+    let Inst{11-4} = 0b00000000;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
   def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"),
+                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
 }
 }
+}
 
 //===----------------------------------------------------------------------===//
 // Instructions
@@ -652,6 +647,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in
   def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, 
                   "bx", "\tlr", [(ARMretflag)]> {
+  let Inst{3-0}   = 0b1110;
   let Inst{7-4}   = 0b0001;
   let Inst{19-8}  = 0b111111111111;
   let Inst{27-20} = 0b00010010;
@@ -664,6 +660,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
     let Inst{7-4}   = 0b0001;
     let Inst{19-8}  = 0b111111111111;
     let Inst{27-20} = 0b00010010;
+    let Inst{31-28} = 0b1110;
   }
 }
 
@@ -673,7 +670,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
   def LDM_RET : AXI4ld<(outs),
                     (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-                    LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb",
+                    LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb",
                     []>;
 
 // On non-Darwin platforms R9 is callee-saved.
@@ -762,6 +759,7 @@ let isBranch = 1, isTerminator = 1 in {
   def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
                     IIC_Br, "mov\tpc, $target \n$jt",
                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+    let Inst{15-12} = 0b1111;
     let Inst{20}    = 0; // S Bit
     let Inst{24-21} = 0b1101;
     let Inst{27-25} = 0b000;
@@ -771,6 +769,7 @@ let isBranch = 1, isTerminator = 1 in {
                    IIC_Br, "ldr\tpc, $target \n$jt",
                    [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
                      imm:$id)]> {
+    let Inst{15-12} = 0b1111;
     let Inst{20}    = 1; // L bit
     let Inst{21}    = 0; // W bit
     let Inst{22}    = 0; // B bit
@@ -782,6 +781,7 @@ let isBranch = 1, isTerminator = 1 in {
                     IIC_Br, "add\tpc, $target, $idx \n$jt",
                     [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
                       imm:$id)]> {
+    let Inst{15-12} = 0b1111;
     let Inst{20}    = 0; // S bit
     let Inst{24-21} = 0b0100;
     let Inst{27-25} = 0b000;
@@ -813,26 +813,26 @@ def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
 
 // Loads with zero extension
 def LDRH  : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                  IIC_iLoadr, "ldr", "h\t$dst, $addr",
+                  IIC_iLoadr, "ldrh", "\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
 
 def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, 
-                  IIC_iLoadr, "ldr", "b\t$dst, $addr",
+                  IIC_iLoadr, "ldrb", "\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
 
 // Loads with sign extension
 def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                   IIC_iLoadr, "ldr", "sh\t$dst, $addr",
+                   IIC_iLoadr, "ldrsh", "\t$dst, $addr",
                    [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
 
 def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                   IIC_iLoadr, "ldr", "sb\t$dst, $addr",
+                   IIC_iLoadr, "ldrsb", "\t$dst, $addr",
                    [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
-                 IIC_iLoadr, "ldr", "d\t$dst1, $addr",
+                 IIC_iLoadr, "ldrd", "\t$dst1, $addr",
                  []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed loads
@@ -846,35 +846,35 @@ def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
 
 def LDRH_PRE  : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
                      (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                     "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>;
+                     "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
                      (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                     "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>;
+                    "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRB_PRE  : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
                      (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
-                     "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>;
+                     "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
                      (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
-                     "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>;
+                    "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
                       (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                      "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>;
+                      "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                    "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>;
+                   "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
                       (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                      "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>;
+                      "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                    "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>;
+                   "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
 }
 
 // Store
@@ -884,18 +884,18 @@ def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
 
 // Stores with truncate
 def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
-               "str", "h\t$src, $addr",
+               "strh", "\t$src, $addr",
                [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
 
 def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
-               "str", "b\t$src, $addr",
+               "strb", "\t$src, $addr",
                [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
 
 // Store doubleword
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
                StMiscFrm, IIC_iStorer,
-               "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+               "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
@@ -915,28 +915,28 @@ def STR_POST : AI2stwpo<(outs GPR:$base_wb),
 def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am3offset:$offset), 
                      StMiscFrm, IIC_iStoreru,
-                     "str", "h\t$src, [$base, $offset]!", "$base = $base_wb",
+                     "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
 
 def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am3offset:$offset), 
                      StMiscFrm, IIC_iStoreru,
-                     "str", "h\t$src, [$base], $offset", "$base = $base_wb",
+                     "strh", "\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
                                          GPR:$base, am3offset:$offset))]>;
 
 def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am2offset:$offset), 
                      StFrm, IIC_iStoreru,
-                     "str", "b\t$src, [$base, $offset]!", "$base = $base_wb",
+                     "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
 def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am2offset:$offset), 
                      StFrm, IIC_iStoreru,
-                     "str", "b\t$src, [$base], $offset", "$base = $base_wb",
+                     "strb", "\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
@@ -947,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def LDM : AXI4ld<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb",
+               LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb",
                []>;
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def STM : AXI4st<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb",
+               LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb",
                []>;
 
 //===----------------------------------------------------------------------===//
@@ -963,15 +963,13 @@ def STM : AXI4st<(outs),
 let neverHasSideEffects = 1 in
 def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
                 "mov", "\t$dst, $src", []>, UnaryDP {
-  let Inst{4} = 0;
+  let Inst{11-4} = 0b00000000;
   let Inst{25} = 0;
 }
 
 def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), 
                 DPSoRegFrm, IIC_iMOVsr,
                 "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
-  let Inst{4} = 1;
-  let Inst{7} = 0;
   let Inst{25} = 0;
 }
 
@@ -1016,10 +1014,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
 
 let Defs = [CPSR] in {
 def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, 
-                      IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1",
+                      IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
                       [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
 def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
-                      IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1",
+                      IIC_iMOVsi, "movs", "\t$dst, $src, asr #1",
                       [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
 }
 
@@ -1095,15 +1093,19 @@ defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set.
-defm ADDS : AI1_bin_s_irs<0b0100, "add",
-                          BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm SUBS : AI1_bin_s_irs<0b0010, "sub",
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+                          BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
                              BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
                              BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
+                             BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
+                             BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // These don't define reg/reg forms, because they are handled above.
 def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -1115,24 +1117,20 @@ def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
 def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                   IIC_iALUsr, "rsb", "\t$dst, $a, $b",
                   [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{25} = 0;
 }
 
 // RSB with 's' bit set.
 let Defs = [CPSR] in {
 def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                 IIC_iALUi, "rsb", "s\t$dst, $a, $b",
+                 IIC_iALUi, "rsbs", "\t$dst, $a, $b",
                  [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
     let Inst{20} = 1;
     let Inst{25} = 1;
 }
 def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                 IIC_iALUsr, "rsb", "s\t$dst, $a, $b",
+                 IIC_iALUsr, "rsbs", "\t$dst, $a, $b",
                  [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
 }
@@ -1149,8 +1147,6 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                  DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
                  [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
                  Requires<[IsARM, CarryDefIsUnused]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{25} = 0;
 }
 }
@@ -1168,8 +1164,6 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                   DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
                   [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
                   Requires<[IsARM, CarryDefIsUnused]> {
-    let Inst{4} = 1;
-    let Inst{7} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
 }
@@ -1216,14 +1210,11 @@ def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
 def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
                   "mvn", "\t$dst, $src",
                   [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
-  let Inst{4} = 0;
+  let Inst{11-4} = 0b00000000;
 }
 def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
                   IIC_iMOVsr, "mvn", "\t$dst, $src",
-                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
-  let Inst{4} = 1;
-  let Inst{7} = 0;
-}
+                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, 
                   IIC_iMOVi, "mvn", "\t$dst, $imm",
@@ -1536,7 +1527,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
                 IIC_iCMOVr, "mov", "\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP {
-  let Inst{4} = 0;
+  let Inst{11-4} = 0b00000000;
   let Inst{25} = 0;
 }
 
@@ -1545,8 +1536,6 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst),
                 "mov", "\t$dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP {
-  let Inst{4} = 1;
-  let Inst{7} = 0;
   let Inst{25} = 0;
 }
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 25c4acd..e1353b7 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -102,6 +102,19 @@ def addrmode_neonldstm : Operand<i32>,
 }
 */
 
+def h8imm  : Operand<i8> {
+  let PrintMethod = "printHex8ImmOperand";
+}
+def h16imm : Operand<i16> {
+  let PrintMethod = "printHex16ImmOperand";
+}
+def h32imm : Operand<i32> {
+  let PrintMethod = "printHex32ImmOperand";
+}
+def h64imm : Operand<i64> {
+  let PrintMethod = "printHex64ImmOperand";
+}
+
 //===----------------------------------------------------------------------===//
 // NEON load / store instructions
 //===----------------------------------------------------------------------===//
@@ -133,7 +146,7 @@ def VLDMS : NI<(outs),
 // Use vldmia to load a Q register as a D register pair.
 def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
                IIC_fpLoadm,
-               "vldmia $addr, ${dst:dregpair}",
+               "vldmia\t$addr, ${dst:dregpair}",
                [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
   let Inst{27-25} = 0b110;
   let Inst{24}    = 0; // P bit
@@ -145,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
 // Use vstmia to store a Q register as a D register pair.
 def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
                IIC_fpStorem,
-               "vstmia $addr, ${src:dregpair}",
+               "vstmia\t$addr, ${src:dregpair}",
                [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
   let Inst{27-25} = 0b110;
   let Inst{24}    = 0; // P bit
@@ -2282,7 +2295,7 @@ def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
 
 //   VMOV     : Vector Move (Register)
 
-def  VMOVD    : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
+def  VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
                     IIC_VMOVD, "vmov\t$dst, $src", "", []>;
 def  VMOVQ    : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
                     IIC_VMOVD, "vmov\t$dst, $src", "", []>;
@@ -2325,38 +2338,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{
 // be encoded based on the immed values.
 
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
-                         (ins i8imm:$SIMM), IIC_VMOVImm,
+                         (ins h8imm:$SIMM), IIC_VMOVImm,
                          "vmov.i8\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
-                         (ins i8imm:$SIMM), IIC_VMOVImm,
+                         (ins h8imm:$SIMM), IIC_VMOVImm,
                          "vmov.i8\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
 
 def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
-                         (ins i16imm:$SIMM), IIC_VMOVImm,
+                         (ins h16imm:$SIMM), IIC_VMOVImm,
                          "vmov.i16\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
 def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
-                         (ins i16imm:$SIMM), IIC_VMOVImm,
+                         (ins h16imm:$SIMM), IIC_VMOVImm,
                          "vmov.i16\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
 
 def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
-                         (ins i32imm:$SIMM), IIC_VMOVImm,
+                         (ins h32imm:$SIMM), IIC_VMOVImm,
                          "vmov.i32\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
 def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
-                         (ins i32imm:$SIMM), IIC_VMOVImm,
+                         (ins h32imm:$SIMM), IIC_VMOVImm,
                          "vmov.i32\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
 
 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
-                         (ins i64imm:$SIMM), IIC_VMOVImm,
+                         (ins h64imm:$SIMM), IIC_VMOVImm,
                          "vmov.i64\t$dst, $SIMM", "",
                          [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
-                         (ins i64imm:$SIMM), IIC_VMOVImm,
+                         (ins h64imm:$SIMM), IIC_VMOVImm,
                          "vmov.i64\t$dst, $SIMM", "",
                          [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 5d02925..2796364 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -740,3 +740,13 @@ def : T1Pat<(i32 thumb_immshifted:$src),
 
 def : T1Pat<(i32 imm0_255_comp:$src),
             (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                   NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+               [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+                                           imm:$cp))]>,
+               Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 5bfda37..1bb9bfd 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1179,3 +1179,13 @@ let isReMaterializable = 1 in
 def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                   NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+               [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+                                           imm:$cp))]>,
+               Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 455c33b..ba341f4 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -17,7 +17,7 @@ def SDT_ITOF :
 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 def SDT_CMPFP0 :
 SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDT_FMDRR :
+def SDT_VMOVDRR :
 SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
                      SDTCisSameAs<1, 2>]>;
 
@@ -28,7 +28,7 @@ def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
 def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
-def arm_fmdrr  : SDNode<"ARMISD::FMDRR",  SDT_FMDRR>;
+def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR",  SDT_VMOVDRR>;
 
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
@@ -55,21 +55,21 @@ def vfp_f64imm : Operand<f64>,
 //
 
 let canFoldAsLoad = 1 in {
-def FLDD  : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
-                 IIC_fpLoad64, "fldd", "\t$dst, $addr",
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
+                 IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
                  [(set DPR:$dst, (load addrmode5:$addr))]>;
 
-def FLDS  : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
-                 IIC_fpLoad32, "flds", "\t$dst, $addr",
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
+                 IIC_fpLoad32, "vldr", ".32\t$dst, $addr",
                  [(set SPR:$dst, (load addrmode5:$addr))]>;
 } // canFoldAsLoad
 
-def FSTD  : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
-                 IIC_fpStore64, "fstd", "\t$src, $addr",
+def VSTRD  : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
+                 IIC_fpStore64, "vstr", ".64\t$src, $addr",
                  [(store DPR:$src, addrmode5:$addr)]>;
 
-def FSTS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
-                 IIC_fpStore32, "fsts", "\t$src, $addr",
+def VSTRS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
+                 IIC_fpStore32, "vstr", ".32\t$src, $addr",
                  [(store SPR:$src, addrmode5:$addr)]>;
 
 //===----------------------------------------------------------------------===//
@@ -77,32 +77,32 @@ def FSTS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
 //
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpLoadm,
-                  "fldm${addr:submode}d${p}\t${addr:base}, $wb",
+                  "vldm${addr:submode}${p}\t${addr:base}, $wb",
                   []> {
   let Inst{20} = 1;
 }
 
-def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpLoadm, 
-                  "fldm${addr:submode}s${p}\t${addr:base}, $wb",
+                  "vldm${addr:submode}${p}\t${addr:base}, $wb",
                   []> {
   let Inst{20} = 1;
 }
 } // mayLoad, hasExtraDefRegAllocReq
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
-def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpStorem,
-                 "fstm${addr:submode}d${p}\t${addr:base}, $wb",
+                 "vstm${addr:submode}${p}\t${addr:base}, $wb",
                  []> {
   let Inst{20} = 0;
 }
 
-def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpStorem,
-                 "fstm${addr:submode}s${p}\t${addr:base}, $wb",
+                 "vstm${addr:submode}${p}\t${addr:base}, $wb",
                  []> {
   let Inst{20} = 0;
 }
@@ -114,68 +114,68 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
 // FP Binary Operations.
 //
 
-def FADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpALU64, "faddd", "\t$dst, $a, $b",
+def VADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
 
-def FADDS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpALU32, "fadds", "\t$dst, $a, $b",
+def VADDS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
                   [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
 
 // These are encoded as unary instructions.
 let Defs = [FPSCR] in {
-def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
-                 IIC_fpCMP64, "fcmped", "\t$a, $b",
+def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+                 IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
                  [(arm_cmpfp DPR:$a, DPR:$b)]>;
 
-def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
-                 IIC_fpCMP32, "fcmpes", "\t$a, $b",
+def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+                 IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
                  [(arm_cmpfp SPR:$a, SPR:$b)]>;
 }
 
-def FDIVD  : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpDIV64, "fdivd", "\t$dst, $a, $b",
+def VDIVD  : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
 
-def FDIVS  : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 IIC_fpDIV32, "fdivs", "\t$dst, $a, $b",
+def VDIVS  : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                 IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
                  [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
 
-def FMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpMUL64, "fmuld", "\t$dst, $a, $b",
+def VMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
 
-def FMULS  : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpMUL32, "fmuls", "\t$dst, $a, $b",
+def VMULS  : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
                   [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
-                 
-def FNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                  IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b",
+
+def VNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                  IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
                   [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
   let Inst{6} = 1;
 }
 
-def FNMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b",
+def VNMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
                   [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
   let Inst{6} = 1;
 }
 
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), DPR:$b),
-          (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+          (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
 def : Pat<(fmul (fneg SPR:$a), SPR:$b),
-          (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+          (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
 
 
-def FSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpALU64, "fsubd", "\t$dst, $a, $b",
+def VSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
   let Inst{6} = 1;
 }
 
-def FSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpALU32, "fsubs", "\t$dst, $a, $b",
+def VSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
                   [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
   let Inst{6} = 1;
 }
@@ -184,31 +184,31 @@ def FSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
 // FP Unary Operations.
 //
 
-def FABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "fabsd", "\t$dst, $a",
+def VABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+                 IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
                  [(set DPR:$dst, (fabs DPR:$a))]>;
 
-def FABSS  : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                  IIC_fpUNA32, "fabss", "\t$dst, $a",
+def VABSS  : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+                  IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
                   [(set SPR:$dst, (fabs SPR:$a))]>;
 
 let Defs = [FPSCR] in {
-def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
-                  IIC_fpCMP64, "fcmpezd", "\t$a",
+def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+                  IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
                   [(arm_cmpfp0 DPR:$a)]>;
 
-def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
-                  IIC_fpCMP32, "fcmpezs", "\t$a",
+def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+                  IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
                   [(arm_cmpfp0 SPR:$a)]>;
 }
 
-def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTDS, "fcvtds", "\t$dst, $a",
+def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
                  [(set DPR:$dst, (fextend SPR:$a))]>;
 
 // Special case encoding: bits 11-8 is 0b1011.
-def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
-                   IIC_fpCVTSD, "fcvtsd", "\t$dst, $a",
+def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+                   IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a",
                    [(set SPR:$dst, (fround DPR:$a))]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-16} = 0b110111;
@@ -217,52 +217,52 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 }
 
 let neverHasSideEffects = 1 in {
-def FCPYD  : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>;
+def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+                 IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
 
-def FCPYS  : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpUNA32, "fcpys", "\t$dst, $a", []>;
+def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+                 IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
 } // neverHasSideEffects
 
-def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "fnegd", "\t$dst, $a",
+def VNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+                 IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
                  [(set DPR:$dst, (fneg DPR:$a))]>;
 
-def FNEGS  : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                  IIC_fpUNA32, "fnegs", "\t$dst, $a",
+def VNEGS  : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+                  IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
                   [(set SPR:$dst, (fneg SPR:$a))]>;
 
-def FSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpSQRT64, "fsqrtd", "\t$dst, $a",
+def VSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+                 IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
                  [(set DPR:$dst, (fsqrt DPR:$a))]>;
 
-def FSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpSQRT32, "fsqrts", "\t$dst, $a",
+def VSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+                 IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
                  [(set SPR:$dst, (fsqrt SPR:$a))]>;
 
 //===----------------------------------------------------------------------===//
 // FP <-> GPR Copies.  Int <-> FP Conversions.
 //
 
-def FMRS   : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 IIC_VMOVSI, "fmrs", "\t$dst, $src",
+def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
+                 IIC_VMOVSI, "vmov", "\t$dst, $src",
                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
 
-def FMSR   : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 IIC_VMOVIS, "fmsr", "\t$dst, $src",
+def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
+                 IIC_VMOVIS, "vmov", "\t$dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
-def FMRRD  : AVConv3I<0b11000101, 0b1011,
+def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
-                 IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src",
+                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
                  [/* FIXME: Can't write pattern for multiple result instr*/]>;
 
 // FMDHR: GPR -> SPR
 // FMDLR: GPR -> SPR
 
-def FMDRR : AVConv5I<0b11000100, 0b1011,
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2",
+                IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
 
 // FMRDH: SPR -> GPR
@@ -277,53 +277,53 @@ def FMDRR : AVConv5I<0b11000100, 0b1011,
 
 // Int to FP:
 
-def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTID, "fsitod", "\t$dst, $a",
+def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
                  [(set DPR:$dst, (arm_sitof SPR:$a))]> {
   let Inst{7} = 1;
 }
 
-def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
-                 IIC_fpCVTIS, "fsitos", "\t$dst, $a",
+def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
                  [(set SPR:$dst, (arm_sitof SPR:$a))]> {
   let Inst{7} = 1;
 }
 
-def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTID, "fuitod", "\t$dst, $a",
+def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
                  [(set DPR:$dst, (arm_uitof SPR:$a))]>;
 
-def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
-                 IIC_fpCVTIS, "fuitos", "\t$dst, $a",
+def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
                  [(set SPR:$dst, (arm_uitof SPR:$a))]>;
 
 // FP to Int:
 // Always set Z bit in the instruction, i.e. "round towards zero" variants.
 
-def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "ftosizd", "\t$dst, $a",
+                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
                  [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
+def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "ftosizs", "\t$dst, $a",
+                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
                  [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "ftouizd", "\t$dst, $a",
+                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
                  [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
+def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "ftouizs", "\t$dst, $a",
+                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
                  [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
@@ -332,54 +332,54 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
 // FP FMA Operations.
 //
 
-def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "fmacd", "\t$dst, $a, $b",
+def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
                 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                 IIC_fpMAC32, "fmacs", "\t$dst, $a, $b",
+def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                 IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
                  [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                  RegConstraint<"$dstin = $dst">;
 
-def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "fmscd", "\t$dst, $a, $b",
+def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
                 [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "fmscs", "\t$dst, $a, $b",
+def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
                 [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b",
+def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                 IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
              [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
-def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                  IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b",
+def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                  IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
              [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
 def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)),
-          (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
-          (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
-def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b",
+def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                 IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
              [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
-def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b",
+def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
              [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
@@ -389,27 +389,27 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
 // FP Conditional moves.
 //
 
-def FCPYDcc  : ADuI<0b11101011, 0b0000, 0b0100,
+def VMOVDcc  : ADuI<0b11101011, 0b0000, 0b0100,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    IIC_fpUNA64, "fcpyd", "\t$dst, $true",
+                    IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
                 [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
-def FCPYScc  : ASuI<0b11101011, 0b0000, 0b0100,
+def VMOVScc  : ASuI<0b11101011, 0b0000, 0b0100,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    IIC_fpUNA32, "fcpys", "\t$dst, $true",
+                    IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
-def FNEGDcc  : ADuI<0b11101011, 0b0001, 0b0100,
+def VNEGDcc  : ADuI<0b11101011, 0b0001, 0b0100,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    IIC_fpUNA64, "fnegd", "\t$dst, $true",
+                    IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
                 [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
-def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
+def VNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    IIC_fpUNA32, "fnegs", "\t$dst, $true",
+                    IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
@@ -418,8 +418,11 @@ def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
 // Misc.
 //
 
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
 let Defs = [CPSR], Uses = [FPSCR] in
-def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
+def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
+                   "\tapsr_nzcv, fpscr",
              [(arm_fmstat)]> {
   let Inst{27-20} = 0b11101111;
   let Inst{19-16} = 0b0001;
@@ -431,26 +434,26 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
 
 
 // Materialize FP immediates. VFP3 only.
-let isReMaterializable = 1 in 
-def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
                     VFPMiscFrm, IIC_VMOVImm,
-                    "fconsts", "\t$dst, $imm",
-                    [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+                    "fconstd", "\t$dst, $imm",
+                    [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-20} = 0b11;
   let Inst{11-9}  = 0b101;
-  let Inst{8}     = 0;
+  let Inst{8}     = 1;
   let Inst{7-4}   = 0b0000;
 }
 
-let isReMaterializable = 1 in 
-def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
                     VFPMiscFrm, IIC_VMOVImm,
-                    "fconstd", "\t$dst, $imm",
-                    [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+                    "fconsts", "\t$dst, $imm",
+                    [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-20} = 0b11;
   let Inst{11-9}  = 0b101;
-  let Inst{8}     = 1;
+  let Inst{8}     = 0;
   let Inst{7-4}   = 0b0000;
 }
+}
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 7e1783b..304d0ef 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
 
 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
 STATISTIC(NumSTMGened , "Number of stm instructions generated");
-STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
-STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
@@ -127,18 +127,18 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
   case ARM::t2STRi12:
     NumSTMGened++;
     return ARM::t2STM;
-  case ARM::FLDS:
-    NumFLDMGened++;
-    return ARM::FLDMS;
-  case ARM::FSTS:
-    NumFSTMGened++;
-    return ARM::FSTMS;
-  case ARM::FLDD:
-    NumFLDMGened++;
-    return ARM::FLDMD;
-  case ARM::FSTD:
-    NumFSTMGened++;
-    return ARM::FSTMD;
+  case ARM::VLDRS:
+    NumVLDMGened++;
+    return ARM::VLDMS;
+  case ARM::VSTRS:
+    NumVSTMGened++;
+    return ARM::VSTMS;
+  case ARM::VLDRD:
+    NumVLDMGened++;
+    return ARM::VLDMD;
+  case ARM::VSTRD:
+    NumVSTMGened++;
+    return ARM::VSTMD;
   default: llvm_unreachable("Unhandled opcode!");
   }
   return 0;
@@ -229,8 +229,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     BaseKill = true;  // New base is always killed right its use.
   }
 
-  bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
-  bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+  bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
+  bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
   Opcode = getLoadStoreMultipleOpcode(Opcode);
   MachineInstrBuilder MIB = (isAM4)
     ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
@@ -373,27 +373,27 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   case ARM::t2LDRi12:
   case ARM::t2STRi8:
   case ARM::t2STRi12:
-  case ARM::FLDS:
-  case ARM::FSTS:
+  case ARM::VLDRS:
+  case ARM::VSTRS:
     return 4;
-  case ARM::FLDD:
-  case ARM::FSTD:
+  case ARM::VLDRD:
+  case ARM::VSTRD:
     return 8;
   case ARM::LDM:
   case ARM::STM:
   case ARM::t2LDM:
   case ARM::t2STM:
     return (MI->getNumOperands() - 5) * 4;
-  case ARM::FLDMS:
-  case ARM::FSTMS:
-  case ARM::FLDMD:
-  case ARM::FSTMD:
+  case ARM::VLDMS:
+  case ARM::VSTMS:
+  case ARM::VLDMD:
+  case ARM::VSTMD:
     return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
   }
 }
 
 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
 ///
 /// stmia rn, <ra, rb, rc>
 /// rn := rn + 4 * 3;
@@ -475,7 +475,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
       }
     }
   } else {
-    // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+    // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
     if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
       return false;
 
@@ -517,10 +517,10 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
   switch (Opc) {
   case ARM::LDR: return ARM::LDR_PRE;
   case ARM::STR: return ARM::STR_PRE;
-  case ARM::FLDS: return ARM::FLDMS;
-  case ARM::FLDD: return ARM::FLDMD;
-  case ARM::FSTS: return ARM::FSTMS;
-  case ARM::FSTD: return ARM::FSTMD;
+  case ARM::VLDRS: return ARM::VLDMS;
+  case ARM::VLDRD: return ARM::VLDMD;
+  case ARM::VSTRS: return ARM::VSTMS;
+  case ARM::VSTRD: return ARM::VSTMD;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_PRE;
@@ -536,10 +536,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
   switch (Opc) {
   case ARM::LDR: return ARM::LDR_POST;
   case ARM::STR: return ARM::STR_POST;
-  case ARM::FLDS: return ARM::FLDMS;
-  case ARM::FLDD: return ARM::FLDMD;
-  case ARM::FSTS: return ARM::FSTMS;
-  case ARM::FSTD: return ARM::FSTMD;
+  case ARM::VLDRS: return ARM::VLDMS;
+  case ARM::VLDRD: return ARM::VLDMD;
+  case ARM::VSTRS: return ARM::VSTMS;
+  case ARM::VSTRD: return ARM::VSTMD;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_POST;
@@ -564,8 +564,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   unsigned Bytes = getLSMultipleTransferSize(MI);
   int Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
-  bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
-    Opcode == ARM::FSTD || Opcode == ARM::FSTS;
+  bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+    Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
   if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
     return false;
@@ -575,7 +575,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     if (MI->getOperand(2).getImm() != 0)
       return false;
 
-  bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
   // Can't do the merge if the destination register is the same as the would-be
   // writeback register.
   if (isLd && MI->getOperand(0).getReg() == Base)
@@ -626,7 +626,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   if (!DoMerge)
     return false;
 
-  bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+  bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
   unsigned Offset = 0;
   if (isAM5)
     Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
@@ -638,7 +638,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
   if (isLd) {
     if (isAM5)
-      // FLDMS, FLDMD
+      // VLDMS, VLDMD
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
         .addReg(Base, getKillRegState(BaseKill))
         .addImm(Offset).addImm(Pred).addReg(PredReg)
@@ -657,7 +657,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   } else {
     MachineOperand &MO = MI->getOperand(0);
     if (isAM5)
-      // FSTMS, FSTMD
+      // VSTMS, VSTMD
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
         .addImm(Pred).addReg(PredReg)
         .addReg(Base, getDefRegState(true)) // WB base register
@@ -687,11 +687,11 @@ static bool isMemoryOp(const MachineInstr *MI) {
   case ARM::LDR:
   case ARM::STR:
     return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
-  case ARM::FLDS:
-  case ARM::FSTS:
+  case ARM::VLDRS:
+  case ARM::VSTRS:
     return MI->getOperand(1).isReg();
-  case ARM::FLDD:
-  case ARM::FSTD:
+  case ARM::VLDRD:
+  case ARM::VSTRD:
     return MI->getOperand(1).isReg();
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
@@ -866,6 +866,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                       BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
                       Pred, PredReg, TII, isT2);
       } else {
+        if (OddReg == EvenReg && EvenDeadKill) {
+          // If the two source operands are the same, the kill marker is probably
+          // on the first one. e.g.
+          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+          EvenDeadKill = false;
+          OddDeadKill = true;
+        }
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                       EvenReg, EvenDeadKill, EvenUndef,
                       BaseReg, false, BaseUndef, OffReg, false, OffUndef,
@@ -1214,7 +1221,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   if (!STI->hasV5TEOps())
     return false;
 
-  // FIXME: FLDS / FSTS -> FLDD / FSTD
+  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
   unsigned Scale = 1;
   unsigned Opcode = Op0->getOpcode();
   if (Opcode == ARM::LDR)
@@ -1456,7 +1463,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
         continue;
 
       int Opc = MI->getOpcode();
-      bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
+      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
       unsigned Base = MI->getOperand(1).getReg();
       int Offset = getMemoryOpOffset(MI);
 
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 5af95c3..432ed78 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -16,6 +16,7 @@
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -108,6 +109,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
     if (UseNEONFP.getPosition() == 0)
       UseNEONForSinglePrecisionFP = true;
   }
+  HasBranchTargetBuffer = (CPUString == "cortex-a8" ||
+                           CPUString == "cortex-a9");
 }
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
@@ -159,3 +162,13 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
 
   return false;
 }
+
+bool ARMSubtarget::enablePostRAScheduler(
+           CodeGenOpt::Level OptLevel,
+           TargetSubtarget::AntiDepBreakMode& Mode,
+           RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&ARM::GPRRegClass);
+  return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e721a7f..3d0e01e 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -17,6 +17,7 @@
 #include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtarget.h"
+#include "ARMBaseRegisterInfo.h"
 #include <string>
 
 namespace llvm {
@@ -49,6 +50,9 @@ protected:
   /// determine if NEON should actually be used.
   bool UseNEONForSinglePrecisionFP;
 
+  /// HasBranchTargetBuffer - True if processor can predict indirect branches.
+  bool HasBranchTargetBuffer;
+
   /// IsThumb - True if we are in thumb mode, false if in ARM mode.
   bool IsThumb;
 
@@ -122,17 +126,16 @@ protected:
   bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
   bool hasThumb2() const { return ThumbMode >= Thumb2; }
 
+  bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; }
+
   bool isR9Reserved() const { return IsR9Reserved; }
 
   const std::string & getCPUString() const { return CPUString; }
   
-  /// enablePostRAScheduler - True at 'More' optimization except
-  /// for Thumb1.
+  /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& mode) const {
-    mode = TargetSubtarget::ANTIDEP_CRITICAL;
-    return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
-  }
+                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
 
   /// getInstrItins - Return the instruction itineraies based on subtarget
   /// selection.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b4ce1d7..2564ed9 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -21,8 +21,7 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
-                                        const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
@@ -61,8 +60,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
                                    const std::string &FS)
   : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
-               std::string("e-p:32:32-f64:32:32-i64:32:32") :
-               std::string("e-p:32:32-f64:64:64-i64:64:64")),
+               std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
     TLInfo(*this) {
 }
 
@@ -74,9 +73,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:32-i64:32:32-"
-                           "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+                           "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
-                           "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
+                           "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
     TLInfo(*this) {
 }
 
@@ -94,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
   if (Subtarget.hasNEON())
     PM.add(createNEONPreAllocPass());
 
+  // Calculate and set max stack object alignment early, so we can decide
+  // whether we will need stack realignment (and thus FP).
+  PM.add(createARMMaxStackAlignmentCalculatorPass());
+
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
   if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass(true));
@@ -106,6 +109,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass());
 
+  // Expand some pseudo instructions into multiple instructions to allow
+  // proper scheduling.
+  PM.add(createARMExpandPseudoPass());
+
   return true;
 }
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 6cb3e9e4..0352503 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -43,6 +43,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -138,6 +139,19 @@ namespace {
     void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum);
     void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum);
 
+    void printHex8ImmOperand(const MachineInstr *MI, int OpNum) {
+      O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
+    }
+    void printHex16ImmOperand(const MachineInstr *MI, int OpNum) {
+      O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
+    }
+    void printHex32ImmOperand(const MachineInstr *MI, int OpNum) {
+      O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
+    }
+    void printHex64ImmOperand(const MachineInstr *MI, int OpNum) {
+      O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+    }
+
     virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                  unsigned AsmVariant, const char *ExtraCode);
     virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
@@ -199,7 +213,7 @@ namespace {
       if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
       if (ACPV->getPCAdjustment() != 0) {
         O << "-(" << MAI->getPrivateGlobalPrefix() << "PC"
-          << ACPV->getLabelId()
+          << getFunctionNumber() << "_"  << ACPV->getLabelId()
           << "+" << (unsigned)ACPV->getPCAdjustment();
          if (ACPV->mustAddCurrentAddress())
            O << "-.";
@@ -333,6 +347,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                                &ARM::DPR_VFP2RegClass);
       O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
     } else {
+      assert(!MO.getSubReg() && "Subregs should be eliminated!");
       O << getRegisterName(Reg);
     }
     break;
@@ -594,12 +609,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
 
   if (Modifier && strcmp(Modifier, "submode") == 0) {
     ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
-    if (MO1.getReg() == ARM::SP) {
-      bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
-                     MI->getOpcode() == ARM::FLDMS);
-      O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
-    } else
-      O << ARM_AM::getAMSubModeStr(Mode);
+    O << ARM_AM::getAMSubModeStr(Mode);
     return;
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
@@ -623,9 +633,14 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   const MachineOperand &MO3 = MI->getOperand(Op+2);
+  const MachineOperand &MO4 = MI->getOperand(Op+3);
 
-  // FIXME: No support yet for specifying alignment.
-  O << "[" << getRegisterName(MO1.getReg()) << "]";
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO4.getImm()) {
+    // FIXME: Both darwin as and GNU as violate ARM docs here.
+    O << ", :" << MO4.getImm();
+  }
+  O << "]";
 
   if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
     if (MO2.getReg() == 0)
@@ -697,11 +712,8 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
   O << "[" << getRegisterName(MO1.getReg());
   if (MO3.getReg())
     O << ", " << getRegisterName(MO3.getReg());
-  else if (unsigned ImmOffs = MO2.getImm()) {
-    O << ", #" << ImmOffs;
-    if (Scale > 1)
-      O << " * " << Scale;
-  }
+  else if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs * Scale;
   O << "]";
 }
 
@@ -844,7 +856,8 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
 
 void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
   int Id = (int)MI->getOperand(OpNum).getImm();
-  O << MAI->getPrivateGlobalPrefix() << "PC" << Id;
+  O << MAI->getPrivateGlobalPrefix()
+    << "PC" << getFunctionNumber() << "_" << Id;
 }
 
 void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
@@ -1070,7 +1083,7 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
     printInstruction(MI);
   }
   
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
   processDebugLoc(MI, false);
@@ -1107,9 +1120,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
     }
   }
 
-  // Use unified assembler syntax mode for Thumb.
-  if (Subtarget->isThumb())
-    O << "\t.syntax unified\n";
+  // Use unified assembler syntax.
+  O << "\t.syntax unified\n";
 
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF()) {
@@ -1349,7 +1361,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     printKill(MI);
     return;
   case TargetInstrInfo::INLINEASM:
-    O << '\t';
     printInlineAsm(MI);
     return;
   case TargetInstrInfo::IMPLICIT_DEF:
@@ -1365,7 +1376,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     // FIXME: MOVE TO SHARED PLACE.
     unsigned Id = (unsigned)MI->getOperand(2).getImm();
     const char *Prefix = MAI->getPrivateGlobalPrefix();
-    MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id));
+    MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+                         + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
     OutStreamer.EmitLabel(Label);
     
     
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index f422798..0047925 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -259,12 +259,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   
   if (Modifier && strcmp(Modifier, "submode") == 0) {
     ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
-    if (MO1.getReg() == ARM::SP) {
-      bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
-                     MI->getOpcode() == ARM::FLDMS);
-      O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
-    } else
-      O << ARM_AM::getAMSubModeStr(Mode);
+    O << ARM_AM::getAMSubModeStr(Mode);
     return;
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 5bf966b..9e7f8d5 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -80,6 +80,10 @@ public:
   void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
   void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
   void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
+  void printHex8ImmOperand(const MCInst *MI, int OpNum) {}
+  void printHex16ImmOperand(const MCInst *MI, int OpNum) {}
+  void printHex32ImmOperand(const MCInst *MI, int OpNum) {}
+  void printHex64ImmOperand(const MCInst *MI, int OpNum) {}
 
   void printPCLabel(const MCInst *MI, unsigned OpNum);  
   // FIXME: Implement.
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 8686961..c49fee3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -137,6 +137,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_Register:
       // Ignore all implicit register operands.
       if (MO.isImplicit()) continue;
+      assert(!MO.getSubReg() && "Subregs should be eliminated!");
       MCOp = MCOperand::CreateReg(MO.getReg());
       break;
     case MachineOperand::MO_Immediate:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index e071b61..964551f 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_target(ARMCodeGen
   ARMCodeEmitter.cpp
   ARMConstantIslandPass.cpp
   ARMConstantPoolValue.cpp
+  ARMExpandPseudoInsts.cpp
   ARMISelDAGToDAG.cpp
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index f307e3b..7d767ec 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -54,10 +54,10 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
     NextMII = next(MII);
     MachineInstr *MI = &*MII;
 
-    if (MI->getOpcode() == ARM::FCPYD &&
+    if (MI->getOpcode() == ARM::VMOVD &&
         !TII->isPredicated(MI)) {
       unsigned SrcReg = MI->getOperand(1).getReg();
-      // If we do not found an instruction defining the reg, this means the
+      // If we do not find an instruction defining the reg, this means the
       // register should be live-in for this BB. It's always to better to use
       // NEON reg-reg moves.
       unsigned Domain = ARMII::DomainNEON;
@@ -71,7 +71,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
       }
 
       if (Domain & ARMII::DomainNEON) {
-        // Convert FCPYD to VMOVD.
+        // Convert VMOVD to VMOVDneon
         unsigned DestReg = MI->getOperand(0).getReg();
 
         DEBUG({errs() << "vmov convert: "; MI->dump();});
@@ -82,7 +82,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
         //  - The imp-defs / imp-uses are superregs only, we don't care about
         //    them.
         BuildMI(MBB, *MI, MI->getDebugLoc(),
-                TII->get(ARM::VMOVD), DestReg).addReg(SrcReg);
+                TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
         MBB.erase(MI);
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 8b2bcd0..206677b 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST2LNd8:
   case ARM::VST2LNd16:
   case ARM::VST2LNd32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 2;
     return true;
 
   case ARM::VST2q8:
   case ARM::VST2q16:
   case ARM::VST2q32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     return true;
 
   case ARM::VST2LNq16a:
   case ARM::VST2LNq32a:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 2;
     Offset = 0;
     Stride = 2;
@@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST2LNq16b:
   case ARM::VST2LNq32b:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 2;
     Offset = 1;
     Stride = 2;
@@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST3LNd8:
   case ARM::VST3LNd16:
   case ARM::VST3LNd32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 3;
     return true;
 
   case ARM::VST3q8a:
   case ARM::VST3q16a:
   case ARM::VST3q32a:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
@@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST3q8b:
   case ARM::VST3q16b:
   case ARM::VST3q32b:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
@@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST3LNq16a:
   case ARM::VST3LNq32a:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
@@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST3LNq16b:
   case ARM::VST3LNq32b:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
@@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST4LNd8:
   case ARM::VST4LNd16:
   case ARM::VST4LNd32:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     return true;
 
   case ARM::VST4q8a:
   case ARM::VST4q16a:
   case ARM::VST4q32a:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
@@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST4q8b:
   case ARM::VST4q16b:
   case ARM::VST4q32b:
-    FirstOpnd = 4;
+    FirstOpnd = 5;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
@@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST4LNq16a:
   case ARM::VST4LNq32a:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
@@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
 
   case ARM::VST4LNq16b:
   case ARM::VST4LNq32b:
-    FirstOpnd = 3;
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index e7770b2..6b605bb 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -37,7 +37,7 @@ LPCRELL0:
 	mov r1, #PCRELV0
 	add r1, pc
 	ldr r0, [r0, r1]
-	cpy pc, r0 
+	mov pc, r0 
 	.align	2
 LJTI1_0_0:
 	.long	 LBB1_3
@@ -51,7 +51,7 @@ We should be able to generate:
 LPCRELL0:
 	add r1, LJTI1_0_0
 	ldr r0, [r0, r1]
-	cpy pc, r0 
+	mov pc, r0 
 	.align	2
 LJTI1_0_0:
 	.long	 LBB1_3
@@ -206,8 +206,8 @@ LPC0:
 	add r5, pc
 	ldr r6, LCPI1_1
 	ldr r2, LCPI1_2
-	cpy r3, r6
-	cpy lr, pc
+	mov r3, r6
+	mov lr, pc
 	bx r5
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index fb64d9f..11c48ad 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -321,7 +321,7 @@ time.
 4) Once we added support for multiple result patterns, write indexed loads
    patterns instead of C++ instruction selection code.
 
-5) Use FLDM / FSTM to emulate indexed FP load / store.
+5) Use VLDM / VSTM to emulate indexed FP load / store.
 
 //===---------------------------------------------------------------------===//
 
@@ -591,3 +591,8 @@ http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
 //===---------------------------------------------------------------------===//
 
 Make use of the "rbit" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how
+to licm and cse the unnecessary load from cp#1.
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index b6dd56c..7602b6d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===//
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMInstrInfo.h"
+#include "Thumb1InstrInfo.h"
 #include "ARM.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 13cc578..b28229d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===//
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 5aaaf9c..37adf37 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===//
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -794,7 +795,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
     if (NumBytes != 0)
       emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
   } else {
-    // Unwind MBBI to point to first LDR / FLDD.
+    // Unwind MBBI to point to first LDR / VLDRD.
     const unsigned *CSRegs = getCalleeSavedRegs();
     if (MBBI != MBB.begin()) {
       do
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 241f1cc..37ad388 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===//
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 462844b..f5ba155 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===//
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -34,10 +34,6 @@ namespace {
     }
 
   private:
-    MachineBasicBlock::iterator
-      SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                      MachineInstr *MI, DebugLoc dl,
-                      unsigned PredReg, ARMCC::CondCodes CC);
     bool InsertITBlocks(MachineBasicBlock &MBB);
   };
   char Thumb2ITBlockPass::ID = 0;
@@ -50,34 +46,6 @@ static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
   return llvm::getInstrPredicate(MI, PredReg);
 }
 
-MachineBasicBlock::iterator
-Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   MachineInstr *MI,
-                                   DebugLoc dl, unsigned PredReg,
-                                   ARMCC::CondCodes CC) {
-  // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
-  // The only reason it was a single instruction was so it could be
-  // re-materialized. We want to split it before this and the thumb2
-  // size reduction pass to make sure the IT mask is correct and expose
-  // width reduction opportunities. It doesn't make sense to do this in a 
-  // separate pass so here it is.
-  unsigned DstReg = MI->getOperand(0).getReg();
-  bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
-  unsigned Imm = MI->getOperand(1).getImm();
-  unsigned Lo16 = Imm & 0xffff;
-  unsigned Hi16 = (Imm >> 16) & 0xffff;
-  BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
-    .addImm(Lo16).addImm(CC).addReg(PredReg);
-  BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
-    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
-    .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
-  --MBBI;
-  --MBBI;
-  MI->eraseFromParent();
-  return MBBI;
-}
-
 bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
   bool Modified = false;
 
@@ -88,11 +56,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
     unsigned PredReg = 0;
     ARMCC::CondCodes CC = getPredicate(MI, PredReg);
 
-    if (MI->getOpcode() == ARM::t2MOVi32imm) {
-      MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC);
-      continue;
-    }
-
     if (CC == ARMCC::AL) {
       ++MBBI;
       continue;
@@ -115,11 +78,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
       DebugLoc ndl = NMI->getDebugLoc();
       unsigned NPredReg = 0;
       ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
-      if (NMI->getOpcode() == ARM::t2MOVi32imm) {
-        MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC);
-        continue;
-      }
-
       if (NCC == OCC) {
         Mask |= (1 << Pos);
       } else if (NCC != CC)
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 21fff51..16c1e6f 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
 #include "ARM.h"
+#include "ARMConstantPoolValue.h"
 #include "ARMAddressingModes.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
@@ -132,7 +133,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
 }
 
-
 void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                                unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index f3688c0..663a60b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===//
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index a295630..b3cf2e5 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===//
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index e3587fb..5b0a89d 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -225,8 +225,6 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void AlphaDAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
-  
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
   CurDAG->RemoveDeadNodes();
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index cb03a6f..9217522 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -426,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       }
     } else { //more args
       // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6));
+      int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false);
 
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
@@ -444,7 +444,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
         args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
       SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
-      int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
+      int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
       if (i == 0) VarArgsBase = FI;
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
       LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
@@ -452,7 +452,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
         args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
       argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
-      FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
+      FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
       SDFI = DAG.getFrameIndex(FI, MVT::i64);
       LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
     }
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 81e1fb7..8917e86 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -391,7 +391,7 @@ def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
 def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
 
 
-let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
   def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
   def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
 }
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 98e9730..64bdd62 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -314,7 +314,7 @@ unsigned AlphaRegisterInfo::getRARegister() const {
   return 0;
 }
 
-unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return hasFP(MF) ? Alpha::R15 : Alpha::R30;
 }
 
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 66f0898..a971e21 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -52,7 +52,7 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index b8bc13b..d0d5a43 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -28,7 +28,7 @@ extern "C" void LLVMInitializeAlphaTarget() {
 AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
                                        const std::string &FS)
   : LLVMTargetMachine(T, TT),
-    DataLayout("e-f128:128:128"),
+    DataLayout("e-f128:128:128-n64"),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
     JITInfo(*this),
     Subtarget(TT, FS),
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 209a5bf..338057b 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -178,7 +178,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       processDebugLoc(II, true);
       printInstruction(II);
       
-      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+      if (VerboseAsm)
         EmitComments(*II);
       O << '\n';
       processDebugLoc(II, false);
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
index 1900d00..917f7f5 100644
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -31,8 +31,8 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-
 using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -143,7 +143,7 @@ bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       processDebugLoc(II, true);
 
       printInstruction(II);
-      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+      if (VerboseAsm)
         EmitComments(*II);
       O << '\n';
       
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 4b321ec..c5c96f8 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -25,7 +25,7 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
-
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -207,7 +207,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
     } else {
       assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
       unsigned ObjSize = VA.getLocVT().getStoreSize();
-      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
+                                      true, false);
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
     }
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index c952af1..88ff85f 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -174,6 +174,7 @@ def CALLp: F1<(outs), (ins P:$func, variable_ops),
 
 let isReturn     = 1,
     isTerminator = 1,
+    isBarrier    = 1,
     Uses         = [RETS] in
 def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
 
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index c8c5925..ea9480d 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -62,7 +62,6 @@ BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
 bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
   // Overload Table
   const bool OTable[] = {
-    false,  // illegal intrinsic
 #define GET_INTRINSIC_OVERLOAD_TABLE
 #include "BlackfinGenIntrinsics.inc"
 #undef GET_INTRINSIC_OVERLOAD_TABLE
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 8c0a58a..224165b 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -368,7 +368,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (requiresRegisterScavenging(MF)) {
     // Reserve a slot close to SP or frame pointer.
     RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment()));
+                                                       RC->getAlignment(),
+                                                       false));
   }
 }
 
@@ -449,7 +450,8 @@ unsigned BlackfinRegisterInfo::getRARegister() const {
   return BF::RETS;
 }
 
-unsigned BlackfinRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned
+BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return hasFP(MF) ? BF::FP : BF::SP;
 }
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 501f504..68ef08a 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -76,7 +76,7 @@ namespace llvm {
     void emitPrologue(MachineFunction &MF) const;
     void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
-    unsigned getFrameRegister(MachineFunction &MF) const;
+    unsigned getFrameRegister(const MachineFunction &MF) const;
     unsigned getRARegister() const;
 
     // Exception handling queries.
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 47ba2fe..45d7c35 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -28,7 +28,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
                                              const std::string &TT,
                                              const std::string &FS)
   : LLVMTargetMachine(T, TT),
-    DataLayout("e-p:32:32-i64:32-f64:32"),
+    DataLayout("e-p:32:32-i64:32-f64:32-n32"),
     Subtarget(TT, FS),
     TLInfo(*this),
     InstrInfo(Subtarget),
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 007fe8f..dc9f81c4 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -406,7 +406,7 @@ void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
   processDebugLoc(MI, true);
   printInstruction(MI);
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   processDebugLoc(MI, false);
   O << '\n';
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 1f9e5fc..c69a751 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -417,8 +417,6 @@ namespace {
 void
 SPUDAGToDAGISel::InstructionSelect()
 {
-  DEBUG(BB->dump());
-
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
   CurDAG->RemoveDeadNodes();
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index aaf0783..4dd82a6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1090,7 +1090,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       // We need to load the argument to a virtual register if we determined
       // above that we ran out of physical registers of the appropriate type
       // or we're forced to do vararg
-      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
       ArgOffset += StackSlotSize;
@@ -1110,7 +1110,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
     // Create the frame slot
 
     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+                                                 true, false);
       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 09849da..d3b575a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -3601,21 +3601,23 @@ def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
           (BRASL texternalsym:$func)>;
 
 // Unconditional branches:
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-  def BR :
-    UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
-      "br\t$dest",
-      [(br bb:$dest)]>;
-
-  // Unconditional, absolute address branch
-  def BRA:
-    UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
-      "bra\t$dest",
-      [/* no pattern */]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  let isBarrier = 1 in {
+    def BR :
+      UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+        "br\t$dest",
+        [(br bb:$dest)]>;
+
+    // Unconditional, absolute address branch
+    def BRA:
+      UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+        "bra\t$dest",
+        [/* no pattern */]>;
 
-  // Indirect branch
-  def BI:
-    BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+    // Indirect branch
+    def BI:
+      BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+  }
 
   // Conditional branches:
   class BRNZInst<dag IOL, list<dag> pattern>:
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 8412006..af94e67 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -596,7 +596,7 @@ SPURegisterInfo::getRARegister() const
 }
 
 unsigned
-SPURegisterInfo::getFrameRegister(MachineFunction &MF) const
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
 {
   return SPU::R1;
 }
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 1d9d07e..9691cb6 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -78,7 +78,7 @@ namespace llvm {
     //! Get return address register (LR, aka R0)
     unsigned getRARegister() const;
     //! Get the stack frame register (SP, aka R1)
-    unsigned getFrameRegister(MachineFunction &MF) const;
+    unsigned getFrameRegister(const MachineFunction &MF) const;
     //! Perform target-specific stack frame setup.
     void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index 94ac73c..88201c6 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -82,7 +82,7 @@ namespace llvm {
     const char *getTargetDataString() const {
       return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
              "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
-             "-s:128:128";
+             "-s:128:128-n32:64";
     }
   };
 } // End llvm namespace
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index 11ac931..145359f 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -70,9 +71,6 @@ namespace {
     void printSrcMemOperand(const MachineInstr *MI, int OpNum,
                             const char* Modifier = 0);
     void printCCOperand(const MachineInstr *MI, int OpNum);
-    void printInstruction(const MachineInstr *MI);  // autogenerated.
-    static const char *getRegisterName(unsigned RegNo);
-
     void printMachineInstruction(const MachineInstr * MI);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                          unsigned AsmVariant,
@@ -82,13 +80,10 @@ namespace {
                                const char *ExtraCode);
     void printInstructionThroughMCStreamer(const MachineInstr *MI);
 
+    void PrintGlobalVariable(const GlobalVariable* GVar);
     void emitFunctionHeader(const MachineFunction &MF);
     bool runOnMachineFunction(MachineFunction &F);
 
-    virtual void PrintGlobalVariable(const GlobalVariable *GV) {
-      // FIXME: No support for global variables?
-    }
-
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AsmPrinter::getAnalysisUsage(AU);
       AU.setPreservesAll();
@@ -96,8 +91,89 @@ namespace {
   };
 } // end of anonymous namespace
 
-#include "MSP430GenAsmWriter.inc"
+void MSP430AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
+  if (!GVar->hasInitializer())
+    return;   // External global require no code
+
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(GVar))
+    return;
+
+  const TargetData *TD = TM.getTargetData();
+
+  std::string name = Mang->getMangledName(GVar);
+  Constant *C = GVar->getInitializer();
+  unsigned Size = TD->getTypeAllocSize(C->getType());
+  unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+  printVisibility(name, GVar->getVisibility());
+
+  O << "\t.type\t" << name << ",@object\n";
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+                                                                  TM));
+
+  if (C->isNullValue() && !GVar->hasSection() &&
+      !GVar->isThreadLocal() &&
+      (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+
+    if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+
+    if (GVar->hasLocalLinkage())
+      O << "\t.local\t" << name << '\n';
+
+    O << MAI->getCOMMDirective()  << name << ',' << Size;
+    if (MAI->getCOMMDirectiveTakesAlignment())
+      O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << ' ';
+      WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+    }
+    O << '\n';
+    return;
+  }
+
+  switch (GVar->getLinkage()) {
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+    O << "\t.weak\t" << name << '\n';
+    break;
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
+    // If external or appending, declare as a global symbol
+    O << "\t.globl " << name << '\n';
+    // FALL THROUGH
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+  case GlobalValue::InternalLinkage:
+     break;
+  default:
+    assert(0 && "Unknown linkage type!");
+  }
+
+  // Use 16-bit alignment by default to simplify bunch of stuff
+  EmitAlignment(Align, GVar);
+  O << name << ":";
+  if (VerboseAsm) {
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << ' ';
+    WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+  }
+  O << '\n';
 
+  EmitGlobalConstant(C);
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << name << ", " << Size << '\n';
+}
 
 void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
   const Function *F = MF.getFunction();
@@ -161,14 +237,9 @@ void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
   processDebugLoc(MI, true);
 
-  // Call the autogenerated instruction printer routines.
-  if (EnableMCInst) {
-    printInstructionThroughMCStreamer(MI);
-  } else {
-    printInstruction(MI);
-  }
+  printInstructionThroughMCStreamer(MI);
 
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
 
@@ -180,7 +251,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    O << getRegisterName(MO.getReg());
+    O << MSP430InstPrinter::getRegisterName(MO.getReg());
     return;
   case MachineOperand::MO_Immediate:
     if (!Modifier || strcmp(Modifier, "nohash"))
@@ -224,22 +295,23 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
   const MachineOperand &Base = MI->getOperand(OpNum);
   const MachineOperand &Disp = MI->getOperand(OpNum+1);
 
-  if (Base.isGlobal())
-    printOperand(MI, OpNum, "mem");
-  else if (Disp.isImm() && !Base.getReg())
+  // Print displacement first
+  if (!Disp.isImm()) {
+    printOperand(MI, OpNum+1, "mem");
+  } else {
+    if (!Base.getReg())
+      O << '&';
+
+    printOperand(MI, OpNum+1, "nohash");
+  }
+
+
+  // Print register base field
+  if (Base.getReg()) {
+    O << '(';
     printOperand(MI, OpNum);
-  else if (Base.getReg()) {
-    if (Disp.getImm()) {
-      printOperand(MI, OpNum + 1, "nohash");
-      O << '(';
-      printOperand(MI, OpNum);
-      O << ')';
-    } else {
-      O << '@';
-      printOperand(MI, OpNum);
-    }
-  } else
-    llvm_unreachable("Unsupported memory operand");
+    O << ')';
+  }
 }
 
 void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) {
@@ -294,8 +366,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 //===----------------------------------------------------------------------===//
-void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
-{
+void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI){
 
   MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
 
@@ -309,7 +380,6 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
     printKill(MI);
     return;
   case TargetInstrInfo::INLINEASM:
-    O << '\t';
     printInlineAsm(MI);
     return;
   case TargetInstrInfo::IMPLICIT_DEF:
@@ -324,7 +394,18 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
   printMCInst(&TmpInst);
 }
 
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI,
+                                                raw_ostream &O) {
+  if (SyntaxVariant == 0)
+    return new MSP430InstPrinter(O, MAI);
+  return 0;
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeMSP430AsmPrinter() {
   RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+  TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+                                        createMSP430MCInstPrinter);
 }
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
index a3ecc67..0a403c4 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
@@ -25,11 +25,9 @@ using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
-#define MSP430AsmPrinter MSP430InstPrinter  // FIXME: REMOVE.
 #define NO_ASM_WRITER_BOILERPLATE
 #include "MSP430GenAsmWriter.inc"
 #undef MachineInstr
-#undef MSP430AsmPrinter
 
 void MSP430InstPrinter::printInst(const MCInst *MI) {
   printInstruction(MI);
@@ -65,25 +63,22 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand &Base = MI->getOperand(OpNo);
   const MCOperand &Disp = MI->getOperand(OpNo+1);
 
-  // FIXME: move global to displacement field!
-  if (Base.isExpr()) {
+  // Print displacement first
+  if (Disp.isExpr()) {
     O << '&';
-    Base.getExpr()->print(O, &MAI);
-  } else if (Disp.isImm() && !Base.isReg())
-    printOperand(MI, OpNo);
-  else if (Base.isReg()) {
-    if (Disp.getImm()) {
-      O << Disp.getImm() << '(';
-      printOperand(MI, OpNo);
-      O << ')';
-    } else {
-      O << '@';
-      printOperand(MI, OpNo);
-    }
+    Disp.getExpr()->print(O, &MAI);
   } else {
-    Base.dump();
-    Disp.dump();
-    llvm_unreachable("Unsupported memory operand");
+    assert(Disp.isImm() && "Expected immediate in displacement field");
+    if (!Base.getReg())
+      O << '&';
+
+    O << Disp.getImm();
+  }
+
+
+  // Print register base field
+  if (Base.getReg()) {
+    O << '(' << getRegisterName(Base.getReg()) << ')';
   }
 }
 
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 89313ab..870a3df 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -50,11 +50,17 @@ include "MSP430InstrInfo.td"
 
 def MSP430InstrInfo : InstrInfo {} 
 
+
+def MSP430InstPrinter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+}
+
 //===----------------------------------------------------------------------===//
 // Target Declaration
 //===----------------------------------------------------------------------===//
 
 def MSP430 : Target {
   let InstructionSet = MSP430InstrInfo;
+  let AssemblyWriters = [MSP430InstPrinter];
 }
 
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index b7d9282..c0084be 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -45,6 +45,70 @@ static const bool ViewRMWDAGs = false;
 
 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
 
+
+namespace {
+  struct MSP430ISelAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    struct {            // This is really a union, discriminated by BaseType!
+      SDValue Reg;
+      int FrameIndex;
+    } Base;
+
+    int16_t Disp;
+    GlobalValue *GV;
+    Constant *CP;
+    BlockAddress *BlockAddr;
+    const char *ES;
+    int JT;
+    unsigned Align;    // CP alignment.
+
+    MSP430ISelAddressMode()
+      : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+        ES(0), JT(-1), Align(0) {
+    }
+
+    bool hasSymbolicDisplacement() const {
+      return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+    }
+
+    bool hasBaseReg() const {
+      return Base.Reg.getNode() != 0;
+    }
+
+    void setBaseReg(SDValue Reg) {
+      BaseType = RegBase;
+      Base.Reg = Reg;
+    }
+
+    void dump() {
+      errs() << "MSP430ISelAddressMode " << this << '\n';
+      if (Base.Reg.getNode() != 0) {
+        errs() << "Base.Reg ";
+        Base.Reg.getNode()->dump();
+      } else {
+        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      errs() << " Disp " << Disp << '\n';
+      if (GV) {
+        errs() << "GV ";
+        GV->dump();
+      } else if (CP) {
+        errs() << " CP ";
+        CP->dump();
+        errs() << " Align" << Align << '\n';
+      } else if (ES) {
+        errs() << "ES ";
+        errs() << ES << '\n';
+      } else if (JT != -1)
+        errs() << " JT" << JT << " Align" << Align << '\n';
+    }
+  };
+}
+
 /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
 /// instructions for SelectionDAG operations.
 ///
@@ -65,6 +129,10 @@ namespace {
       return "MSP430 DAG->DAG Pattern Instruction Selection";
     }
 
+    bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+    bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+    bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
     bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
                                     SDNode *Root) const;
 
@@ -79,6 +147,10 @@ namespace {
     DenseMap<SDNode*, SDNode*> RMWStores;
     void PreprocessForRMW();
     SDNode *Select(SDValue Op);
+    SDNode *SelectIndexedLoad(SDValue Op);
+    SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2,
+                               unsigned Opc8, unsigned Opc16);
+
     bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp);
 
   #ifndef NDEBUG
@@ -95,50 +167,155 @@ FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
   return new MSP430DAGToDAGISel(TM, OptLevel);
 }
 
-// FIXME: This is pretty dummy routine and needs to be rewritten in the future.
-bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr,
-                                    SDValue &Base, SDValue &Disp) {
-  // Try to match frame address first.
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
-    Disp = CurDAG->getTargetConstant(0, MVT::i16);
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference.  If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
+  if (AM.hasSymbolicDisplacement())
     return true;
+
+  SDValue N0 = N.getOperand(0);
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+    AM.GV = G->getGlobal();
+    AM.Disp += G->getOffset();
+    //AM.SymbolFlags = G->getTargetFlags();
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+    AM.CP = CP->getConstVal();
+    AM.Align = CP->getAlignment();
+    AM.Disp += CP->getOffset();
+    //AM.SymbolFlags = CP->getTargetFlags();
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+    AM.ES = S->getSymbol();
+    //AM.SymbolFlags = S->getTargetFlags();
+  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+    AM.JT = J->getIndex();
+    //AM.SymbolFlags = J->getTargetFlags();
+  } else {
+    AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+    //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
   }
+  return false;
+}
 
-  switch (Addr.getOpcode()) {
-  case ISD::ADD:
-   // Operand is a result from ADD with constant operand which fits into i16.
-   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      uint64_t CVal = CN->getZExtValue();
-      // Offset should fit into 16 bits.
-      if (((CVal << 48) >> 48) == CVal) {
-        SDValue N0 = Addr.getOperand(0);
-        if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N0))
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
-        else
-          Base = N0;
-
-        Disp = CurDAG->getTargetConstant(CVal, MVT::i16);
-        return true;
-      }
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+    // If so, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = MSP430ISelAddressMode::RegBase;
+  AM.Base.Reg = N;
+  return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+  DebugLoc dl = N.getDebugLoc();
+  DEBUG({
+      errs() << "MatchAddress: ";
+      AM.dump();
+    });
+
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    AM.Disp += Val;
+    return false;
+  }
+
+  case MSP430ISD::Wrapper:
+    if (!MatchWrapper(N, AM))
+      return false;
+    break;
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == MSP430ISelAddressMode::RegBase
+        && AM.Base.Reg.getNode() == 0) {
+      AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
     }
     break;
-  case MSP430ISD::Wrapper:
-    SDValue N0 = Addr.getOperand(0);
-    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
-      Base = CurDAG->getTargetGlobalAddress(G->getGlobal(),
-                                            MVT::i16, G->getOffset());
-      Disp = CurDAG->getTargetConstant(0, MVT::i16);
-      return true;
-    } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(N0)) {
-      Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16);
-      Disp = CurDAG->getTargetConstant(0, MVT::i16);
+
+  case ISD::ADD: {
+    MSP430ISelAddressMode Backup = AM;
+    if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM))
+      return false;
+    AM = Backup;
+    if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM))
+      return false;
+    AM = Backup;
+
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      MSP430ISelAddressMode Backup = AM;
+      uint64_t Offset = CN->getSExtValue();
+      // Start with the LHS as an addr mode.
+      if (!MatchAddress(N.getOperand(0), AM) &&
+          // Address could not have picked a GV address for the displacement.
+          AM.GV == NULL &&
+          // Check to see if the LHS & C is zero.
+          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+        AM.Disp += Offset;
+        return false;
+      }
+      AM = Backup;
     }
     break;
-  };
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N,
+                                    SDValue &Base, SDValue &Disp) {
+  MSP430ISelAddressMode AM;
+
+  if (MatchAddress(N, AM))
+    return false;
+
+  EVT VT = N.getValueType();
+  if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
 
-  Base = Addr;
-  Disp = CurDAG->getTargetConstant(0, MVT::i16);
+  Base  = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+    CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+    AM.Base.Reg;
+
+  if (AM.GV)
+    Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp,
+                                          0/*AM.SymbolFlags*/);
+  else if (AM.CP)
+    Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+                                         AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+  else if (AM.ES)
+    Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+  else if (AM.JT != -1)
+    Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+  else if (AM.BlockAddr)
+    Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
+                                   true /*AM.SymbolFlags*/);
+  else
+    Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
 
   return true;
 }
@@ -187,7 +364,7 @@ bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created
   /// during preprocessing) to determine whether it's legal to introduce such
   /// "cycle" for a moment.
-  DenseMap<SDNode*, SDNode*>::iterator I = RMWStores.find(Root);
+  DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root);
   if (I != RMWStores.end() && I->second == N)
     return true;
 
@@ -423,6 +600,89 @@ void MSP430DAGToDAGISel::PreprocessForRMW() {
   }
 }
 
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  EVT VT = LD->getMemoryVT();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i8:
+    // Sanity check
+    if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+      return false;
+
+    break;
+  case MVT::i16:
+    // Sanity check
+    if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+      return false;
+
+    break;
+  default:
+    return false;
+  }
+
+  return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  if (!isValidIndexedLoad(LD))
+    return NULL;
+
+  MVT VT = LD->getMemoryVT().getSimpleVT();
+
+  unsigned Opcode = 0;
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    Opcode = MSP430::MOV8rm_POST;
+    break;
+  case MVT::i16:
+    Opcode = MSP430::MOV16rm_POST;
+    break;
+  default:
+    return NULL;
+  }
+
+   return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(),
+                                 VT, MVT::i16, MVT::Other,
+                                 LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op,
+                                               SDValue N1, SDValue N2,
+                                               unsigned Opc8, unsigned Opc16) {
+  if (N1.getOpcode() == ISD::LOAD &&
+      N1.hasOneUse() &&
+      IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) {
+    LoadSDNode *LD = cast<LoadSDNode>(N1);
+    if (!isValidIndexedLoad(LD))
+      return NULL;
+
+    MVT VT = LD->getMemoryVT().getSimpleVT();
+    unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+    MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+    SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+    SDNode *ResNode =
+      CurDAG->SelectNodeTo(Op.getNode(), Opc,
+                           VT, MVT::i16, MVT::Other,
+                           Ops0, 3);
+    cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+    // Transfer chain.
+    ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+    // Transfer writeback.
+    ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+    return ResNode;
+  }
+
+  return NULL;
+}
+
+
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void MSP430DAGToDAGISel::InstructionSelect() {
@@ -438,8 +698,6 @@ void MSP430DAGToDAGISel::InstructionSelect() {
   DEBUG(errs() << "Selection DAG after RMW preprocessing:\n");
   DEBUG(CurDAG->dump());
 
-  DEBUG(BB->dump());
-
   // Codegen the basic block.
   DEBUG(errs() << "===== Instruction selection begins:\n");
   DEBUG(Indent = 0);
@@ -482,6 +740,72 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
     return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
                                   TFI, CurDAG->getTargetConstant(0, MVT::i16));
   }
+  case ISD::LOAD:
+    if (SDNode *ResNode = SelectIndexedLoad(Op))
+      return ResNode;
+    // Other cases are autogenerated.
+    break;
+  case ISD::ADD:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Op,
+                           Op.getOperand(0), Op.getOperand(1),
+                           MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+                                MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::SUB:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Op,
+                           Op.getOperand(0), Op.getOperand(1),
+                           MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::AND:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Op,
+                           Op.getOperand(0), Op.getOperand(1),
+                           MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+                                MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::OR:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Op,
+                           Op.getOperand(0), Op.getOperand(1),
+                           MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+                                MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::XOR:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Op,
+                           Op.getOperand(0), Op.getOperand(1),
+                           MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+                                MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
   }
 
   // Select the default instruction
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 34e6d2c..5a925f5 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -62,10 +62,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setBooleanContents(ZeroOrOneBooleanContent);
   setSchedulingPreference(SchedulingForLatency);
 
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  // We have post-incremented loads / stores.
+  setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
 
   // We don't have any truncstores
@@ -115,12 +119,23 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,   Expand);
 
   // FIXME: Implement efficiently multiplication by a constant
+  setOperationAction(ISD::MUL,              MVT::i8,    Expand);
+  setOperationAction(ISD::MULHS,            MVT::i8,    Expand);
+  setOperationAction(ISD::MULHU,            MVT::i8,    Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i8,    Expand);
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i8,    Expand);
   setOperationAction(ISD::MUL,              MVT::i16,   Expand);
   setOperationAction(ISD::MULHS,            MVT::i16,   Expand);
   setOperationAction(ISD::MULHU,            MVT::i16,   Expand);
   setOperationAction(ISD::SMUL_LOHI,        MVT::i16,   Expand);
   setOperationAction(ISD::UMUL_LOHI,        MVT::i16,   Expand);
 
+  setOperationAction(ISD::UDIV,             MVT::i8,    Expand);
+  setOperationAction(ISD::UDIVREM,          MVT::i8,    Expand);
+  setOperationAction(ISD::UREM,             MVT::i8,    Expand);
+  setOperationAction(ISD::SDIV,             MVT::i8,    Expand);
+  setOperationAction(ISD::SDIVREM,          MVT::i8,    Expand);
+  setOperationAction(ISD::SREM,             MVT::i8,    Expand);
   setOperationAction(ISD::UDIV,             MVT::i16,   Expand);
   setOperationAction(ISD::UDIVREM,          MVT::i16,   Expand);
   setOperationAction(ISD::UREM,             MVT::i16,   Expand);
@@ -303,7 +318,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
              << "\n";
       }
       // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false);
 
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
@@ -659,6 +674,42 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
                      DAG.getValueType(Val.getValueType()));
 }
 
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                      SDValue &Base,
+                                                      SDValue &Offset,
+                                                      ISD::MemIndexedMode &AM,
+                                                      SelectionDAG &DAG) const {
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  EVT VT = LD->getMemoryVT();
+  if (VT != MVT::i8 && VT != MVT::i16)
+    return false;
+
+  if (Op->getOpcode() != ISD::ADD)
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+    uint64_t RHSC = RHS->getZExtValue();
+    if ((VT == MVT::i16 && RHSC != 2) ||
+        (VT == MVT::i8 && RHSC != 1))
+      return false;
+
+    Base = Op->getOperand(0);
+    Offset = DAG.getConstant(RHSC, VT);
+    AM = ISD::POST_INC;
+    return true;
+  }
+
+  return false;
+}
+
+
 const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default: return NULL;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index fdbc384..d413ccb 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -136,6 +136,12 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   DebugLoc dl, SelectionDAG &DAG);
 
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base,
+                                            SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
     const MSP430Subtarget &Subtarget;
     const MSP430TargetMachine &TM;
   };
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index a6d9638..b2f09c7 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -35,15 +35,23 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                     const TargetRegisterClass *RC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOStore, 0,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
 
   if (RC == &MSP430::GR16RegClass)
     BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
       .addFrameIndex(FrameIdx).addImm(0)
-      .addReg(SrcReg, getKillRegState(isKill));
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
   else if (RC == &MSP430::GR8RegClass)
     BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
       .addFrameIndex(FrameIdx).addImm(0)
-      .addReg(SrcReg, getKillRegState(isKill));
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
   else
     llvm_unreachable("Cannot store this register to stack slot!");
 }
@@ -54,13 +62,21 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                            const TargetRegisterClass *RC) const{
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOLoad, 0,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
 
   if (RC == &MSP430::GR16RegClass)
     BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
-      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
   else if (RC == &MSP430::GR8RegClass)
     BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
-      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
   else
     llvm_unreachable("Cannot store this register to stack slot!");
 }
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 2b50669..c3bbfe8 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -127,7 +127,7 @@ def NOP : Pseudo<(outs), (ins), "nop", []>;
 //
 
 // FIXME: Provide proper encoding!
-let isReturn = 1, isTerminator = 1 in {
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
 }
 
@@ -142,7 +142,7 @@ let isBarrier = 1 in
 // Conditional branches
 let Uses = [SRW] in
   def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc),
-                            "j$cc $dst",
+                            "j$cc\t$dst",
                             [(MSP430brcc bb:$dst, imm:$cc)]>;
 } // isBranch, isTerminator
 
@@ -215,6 +215,13 @@ def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src),
                 "mov.b\t{$src, $dst}",
                 [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+                          "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+                          "mov.w\t{@$base+, $dst}", []>;
+}
+
 // Any instruction that defines a 8-bit result leaves the high half of the
 // register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
 // be copying from a truncate, but any other 8-bit operation will zero-extend
@@ -280,6 +287,15 @@ def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
                      [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
                       (implicit SRW)]>;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def ADD8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+                          "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+                          "add.w\t{@$base+, $dst}", []>;
+}
+
+
 def ADD8ri  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
                      "add.b\t{$src2, $dst}",
                      [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
@@ -409,6 +425,14 @@ def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
                      [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))),
                       (implicit SRW)]>;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def AND8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+                          "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+                          "and.w\t{@$base+, $dst}", []>;
+}
+
 let isTwoAddress = 0 in {
 def AND8mr  : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
                 "and.b\t{$src, $dst}",
@@ -438,6 +462,92 @@ def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
                  (implicit SRW)]>;
 }
 
+let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
+def OR8rr  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+                    "bis.b\t{$src2, $dst}",
+                    [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+                    "bis.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
+}
+
+def OR8ri  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+                    "bis.b\t{$src2, $dst}",
+                    [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                    "bis.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
+
+def OR8rm  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+                    "bis.b\t{$src2, $dst}",
+                    [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+                    "bis.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def OR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+                        "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+                         "bis.w\t{@$base+, $dst}", []>;
+}
+
+let isTwoAddress = 0 in {
+def OR8mr  : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+                "bis.b\t{$src, $dst}",
+                [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+                "bis.w\t{$src, $dst}",
+                [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi  : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+                "bis.b\t{$src, $dst}",
+                [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+                "bis.w\t{$src, $dst}",
+                [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm  : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+                "bis.b\t{$src, $dst}",
+                [(store (or (i8 (load addr:$dst)),
+                            (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+                "bis.w\t{$src, $dst}",
+                 [(store (or (i16 (load addr:$dst)),
+                             (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr :  Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+                "bic.b\t{$src2, $dst}",
+                [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>;
+def BIC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+                "bic.w\t{$src2, $dst}",
+                [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>;
+
+def BIC8rm :  Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+                "bic.b\t{$src2, $dst}",
+                [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+                "bic.w\t{$src2, $dst}",
+                [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>;
+
+let isTwoAddress = 0 in {
+def BIC8mr :  Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+                "bic.b\t{$src, $dst}",
+                [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+                "bic.w\t{$src, $dst}",
+                [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm :  Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+                "bic.b\t{$src, $dst}",
+                [(store (and (load addr:$dst), (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+                "bic.w\t{$src, $dst}",
+                [(store (and (load addr:$dst), (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
 
 let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
 def XOR8rr  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
@@ -468,6 +578,14 @@ def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
                      [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
                       (implicit SRW)]>;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def XOR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+                          "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+                          "xor.w\t{@$base+, $dst}", []>;
+}
+
 let isTwoAddress = 0 in {
 def XOR8mr  : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
                 "xor.b\t{$src, $dst}",
@@ -525,6 +643,14 @@ def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
                      [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
                       (implicit SRW)]>;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def SUB8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+                          "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+                          "sub.w\t{@$base+, $dst}", []>;
+}
+
 let isTwoAddress = 0 in {
 def SUB8mr  : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
                 "sub.b\t{$src, $dst}",
@@ -650,58 +776,14 @@ def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
 
 } // Defs = [SRW]
 
+def ZEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+                     "mov.b\t{$src, $dst}",
+                     [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
 def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
                      "swpb\t$dst",
                      [(set GR16:$dst, (bswap GR16:$src))]>;
 
-let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
-def OR8rr  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                    "bis.b\t{$src2, $dst}",
-                    [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
-def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                    "bis.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
-}
-
-def OR8ri  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "bis.b\t{$src2, $dst}",
-                    [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
-def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "bis.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
-
-def OR8rm  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
-                    "bis.b\t{$src2, $dst}",
-                    [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
-def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
-                    "bis.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
-
-let isTwoAddress = 0 in {
-def OR8mr  : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
-                "bis.b\t{$src, $dst}",
-                [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
-def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
-                "bis.w\t{$src, $dst}",
-                [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
-
-def OR8mi  : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
-                "bis.b\t{$src, $dst}",
-                [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
-                "bis.w\t{$src, $dst}",
-                [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
-
-def OR8mm  : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
-                "bis.b\t{$src, $dst}",
-                [(store (or (i8 (load addr:$dst)),
-                            (i8 (load addr:$src))), addr:$dst)]>;
-def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
-                "bis.w\t{$src, $dst}",
-                 [(store (or (i16 (load addr:$dst)),
-                             (i16 (load addr:$src))), addr:$dst)]>;
-}
-
 } // isTwoAddress = 1
 
 // Integer comparisons
@@ -851,3 +933,6 @@ def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
           (SUB8mr addr:$dst, GR8:$src)>;
 def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
           (SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index 069313e..4e3a8d0 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -15,6 +15,12 @@
 using namespace llvm;
 
 MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective ="\t.weak\t";
+  SetDirective = "\t.set\t";
+  PCSymbol=".";
+
   AlignmentIsInBytes = false;
   AllowNameToStartWithDigit = true;
+  UsesELFSectionDirectiveForBSS = true;
 }
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 1a5893e..92baad9 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -212,7 +212,7 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
                                                                          const {
   // Create a frame entry for the FPW register that must be saved.
   if (hasFP(MF)) {
-    int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4);
+    int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true, false);
     assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
            "Slot for FPW register must be last in order to be found!");
     FrameIdx = 0;
@@ -355,7 +355,7 @@ unsigned MSP430RegisterInfo::getRARegister() const {
   return MSP430::PCW;
 }
 
-unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return hasFP(MF) ? MSP430::FPW : MSP430::SPW;
 }
 
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 5f3a216..aa08787 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -60,7 +60,7 @@ public:
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
 
   //! Get DWARF debugging register number
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index da54507..14db406 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -32,7 +32,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
   LLVMTargetMachine(T, TT),
   Subtarget(TT, FS),
   // FIXME: Check TargetData string.
-  DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32"),
+  DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
   InstrInfo(*this), TLInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
 
diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt
index b14e93d..5b9634b 100644
--- a/lib/Target/MSP430/README.txt
+++ b/lib/Target/MSP430/README.txt
@@ -11,8 +11,6 @@ available pretty soon.
 Some things are incomplete / not implemented yet (this list surely is not
 complete as well):
 
-0. Implement asmprinting for variables :)
-
 1. Verify, how stuff is handling implicit zext with 8 bit operands (this might
 be modelled currently in improper way - should we need to mark the superreg as
 def for every 8 bit instruction?).
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 66ade89..4898fae 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -282,7 +282,7 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       // Print the assembly for the instruction.
       printInstruction(II);
       
-      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+      if (VerboseAsm)
         EmitComments(*II);
       O << '\n';
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 810dce1..cbcedb8 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -108,7 +108,6 @@ private:
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void MipsDAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
   // Codegen the basic block.
   DEBUG(errs() << "===== Instruction selection begins:\n");
   DEBUG(Indent = 0);
@@ -171,6 +170,27 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
         return true;
       }
     }
+
+    // When loading from constant pools, load the lower address part in
+    // the instruction itself. Instead of:
+    //  lui $2, %hi($CPI1_0)
+    //  addiu $2, $2, %lo($CPI1_0)
+    //  lwc1 $f0, 0($2)
+    // Generate:
+    //  lui $2, %hi($CPI1_0)
+    //  lwc1 $f0, %lo($CPI1_0)($2)
+    if (Addr.getOperand(0).getOpcode() == MipsISD::Hi &&
+        Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+      SDValue LoVal = Addr.getOperand(1); 
+      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(
+          LoVal.getOperand(0))) {
+        if (!CP->getOffset()) {
+          Base = Addr.getOperand(0);
+          Offset = LoVal.getOperand(0);
+          return true;
+        }
+      }
+    }
   }
 
   Base   = Addr;
@@ -315,6 +335,16 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
     case ISD::GLOBAL_OFFSET_TABLE:
       return getGlobalBaseReg();
 
+    case ISD::ConstantFP: {
+      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+      if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) { 
+        SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+        ReplaceUses(N, Zero);
+        return Zero.getNode();
+      }
+      break;
+    }
+
     /// Handle direct and indirect calls when using PIC. On PIC, when 
     /// GOT is smaller than about 64k (small code) the GA target is 
     /// loaded with only one instruction. Otherwise GA's target must 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 61da8f8..c9a43b4 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -568,7 +568,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   Constant *C = N->getConstVal();
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
-                                         MipsII::MO_ABS_HILO);
+                                         N->getOffset(), MipsII::MO_ABS_HILO);
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
 
@@ -704,7 +704,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // the stack (even if less than 4 are used as arguments)
   if (Subtarget->isABI_O32()) {
     int VTsize = EVT(MVT::i32).getSizeInBits()/8;
-    MFI->CreateFixedObject(VTsize, (VTsize*3));
+    MFI->CreateFixedObject(VTsize, (VTsize*3), true, false);
     CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
   } else
     CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
@@ -773,7 +773,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // if O32 ABI is used. For EABI the first address is zero.
     LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
     int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
-                                    LastArgStackLoc);
+                                    LastArgStackLoc, true, false);
 
     SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
 
@@ -849,7 +849,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         // Create the frame index only once. SPOffset here can be anything 
         // (this will be fixed on processFunctionBeforeFrameFinalized)
         if (MipsFI->getGPStackOffset() == -1) {
-          FI = MFI->CreateFixedObject(4, 0);
+          FI = MFI->CreateFixedObject(4, 0, true, false);
           MipsFI->setGPFI(FI);
         }
         MipsFI->setGPStackOffset(LastArgStackLoc);
@@ -1002,7 +1002,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
         // be used on emitPrologue) to avoid mis-calc of the first stack 
         // offset on PEI::calculateFrameObjectOffsets.
         // Arguments are always 32-bit.
-        int FI = MFI->CreateFixedObject(4, 0);
+        int FI = MFI->CreateFixedObject(4, 0, true, false);
         MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4)));
         SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
       
@@ -1025,7 +1025,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       // offset on PEI::calculateFrameObjectOffsets.
       // Arguments are always 32-bit.
       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
-      int FI = MFI->CreateFixedObject(ArgSize, 0);
+      int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
       MipsFI->recordLoadArgsFI(FI, -(ArgSize+
         (FirstStackArgLoc + VA.getLocMemOffset())));
 
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index bd61738..ce89cfd 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -48,6 +48,7 @@ let PrintMethod = "printFCCOperand" in
 def In32BitMode      : Predicate<"!Subtarget.isFP64bit()">;
 def IsSingleFloat    : Predicate<"Subtarget.isSingleFloat()">;
 def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+def IsNotMipsI       : Predicate<"!Subtarget.isMips1()">;
 
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
@@ -173,7 +174,7 @@ let fd = 0 in {
 }
 
 /// Floating Point Memory Instructions
-let Predicates = [IsNotSingleFloat] in {
+let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
   def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), 
                  "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
 
@@ -284,7 +285,12 @@ def fpimm0 : PatLeaf<(fpimm), [{
   return N->isExactlyValue(+0.0);
 }]>;
 
+def fpimm0neg : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
 def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
 
 def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
 def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 9159904..af64c9f 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -134,6 +134,9 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
              const TargetRegisterClass *DestRC,
              const TargetRegisterClass *SrcRC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
+  const MachineFunction *MF = MBB.getParent();
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC != SrcRC) {
@@ -153,6 +156,13 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     else if ((DestRC == Mips::FGR32RegisterClass) &&
              (SrcRC == Mips::CPURegsRegisterClass))
       BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg);
+    else if ((DestRC == Mips::AFGR64RegisterClass) &&
+             (SrcRC == Mips::CPURegsRegisterClass) &&
+             (SrcReg == Mips::ZERO)) {
+      const unsigned *AliasSet = TRI->getAliasSet(DestReg);
+      BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[0]).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[1]).addReg(SrcReg);
+    }         
 
     // Move from/to Hi/Lo registers
     else if ((DestRC == Mips::HILORegisterClass) &&
@@ -163,9 +173,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                (DestRC == Mips::CPURegsRegisterClass)) {
       unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO;
       BuildMI(MBB, I, DL, get(Opc), DestReg);
-
-    // Can't copy this register
-    } else
+    } else 
+      // Can't copy this register
       return false; 
 
     return true;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 949c78a..a300f49 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -103,6 +103,7 @@ public:
   int getGPFI() const { return GPHolder.FI; }
   void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
   void setGPFI(int FI) { GPHolder.FI = FI; }
+  bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
 
   bool hasLoadArgs() const { return HasLoadArgs; }
   bool hasStoreVarArgs() const { return HasStoreVarArgs; } 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index d2289e9..ad326db 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -287,7 +287,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
   }
 
   if (hasFP(MF)) {
-    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), 
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), 
                          StackOffset);
     MipsFI->setFPStackOffset(StackOffset);
     TopCPUSavedRegOff = StackOffset;
@@ -295,7 +295,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
   }
 
   if (MFI->hasCalls()) {
-    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), 
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                          StackOffset);
     MipsFI->setRAStackOffset(StackOffset);
     TopCPUSavedRegOff = StackOffset;
@@ -438,11 +438,10 @@ emitPrologue(MachineFunction &MF) const
       .addReg(Mips::SP).addReg(Mips::ZERO);
   }
 
-  // PIC speficic function prologue
-  if ((isPIC) && (MFI->hasCalls())) {
+  // Restore GP from the saved stack location
+  if (MipsFI->needGPSaveRestore())
     BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
       .addImm(MipsFI->getGPStackOffset());
-  }
 }
 
 void MipsRegisterInfo::
@@ -489,13 +488,11 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
 
 void MipsRegisterInfo::
 processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-  // Set the SPOffset on the FI where GP must be saved/loaded.
+  // Set the stack offset where GP must be saved/loaded from.
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
-  if (MFI->hasCalls() && isPIC) { 
-    MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  if (MipsFI->needGPSaveRestore())
     MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset());
-  }    
 }
 
 unsigned MipsRegisterInfo::
@@ -504,7 +501,7 @@ getRARegister() const {
 }
 
 unsigned MipsRegisterInfo::
-getFrameRegister(MachineFunction &MF) const {
+getFrameRegister(const MachineFunction &MF) const {
   return hasFP(MF) ? Mips::FP : Mips::SP;
 }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 122f786..5b45921 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -65,7 +65,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
   
   /// Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
 
   /// Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 4fa5450..b3c2313 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -38,8 +38,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
                   bool isLittle=false):
   LLVMTargetMachine(T, TT),
   Subtarget(TT, FS, isLittle), 
-  DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") :
-                        std::string("E-p:32:32:32-i8:8:32-i16:16:32")), 
+  DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") :
+                        std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), 
   InstrInfo(*this), 
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
   TLInfo(*this) {
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index b2a4c11..e1f2587 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -46,7 +46,7 @@ PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
 bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   processDebugLoc(MI, true);
   printInstruction(MI);
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
   processDebugLoc(MI, false);
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index cc57d12..e13e6cd 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -30,7 +30,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void PIC16DAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
   SelectRoot(*CurDAG);
   CurDAG->RemoveDeadNodes();
 }
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 635befe..71c3d37 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -1070,7 +1070,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
 
   // Put the value on stack.
   // Get a stack slot index and convert to es.
-  int FI = MF.getFrameInfo()->CreateStackObject(1, 1);
+  int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false);
   const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
 
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index 47087ab..8ba9a1d 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -72,7 +72,7 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return -1;
 }
 
-unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   llvm_unreachable("PIC16 Does not have any frame register");
   return 0;
 }
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 8aa5a10..1d5dbbf 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -59,7 +59,7 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
   virtual void emitPrologue(MachineFunction &MF) const;
   virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
   virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
-  virtual unsigned getFrameRegister(MachineFunction &MF) const;
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const;
   virtual unsigned getRARegister() const;
 
 };
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index 08307e7..e2acb85 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -34,7 +34,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
                                        const std::string &FS, bool Trad)
 : LLVMTargetMachine(T, TT),
   Subtarget(TT, FS, Trad),
-  DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), 
+  DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), 
   InstrInfo(*this), TLInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
 
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 2dac18f..aae4607 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -594,7 +594,7 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
   printInstruction(MI);
   
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
 
@@ -672,14 +672,14 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n';
 
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
   OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
 
   // Emit post-function debug information.
   DW->EndFunction(&MF);
 
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
   // We didn't modify anything.
   return false;
 }
@@ -853,12 +853,12 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
   // Emit post-function debug information.
   DW->EndFunction(&MF);
 
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
   // We didn't modify anything.
   return false;
 }
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b866240..fb9a240 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -187,8 +187,6 @@ private:
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void PPCDAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
-
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
   CurDAG->RemoveDeadNodes();
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 7f48ef0..099fcb5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -637,7 +637,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
   unsigned BitSize;
   bool HasAnyUndefs;
   
-  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
+  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
       return CFP->getValueAPF().isNegZero();
 
@@ -1625,7 +1625,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
       unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
-                                      isImmutable);
+                                      isImmutable, false);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -1690,9 +1690,10 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
                 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
 
     VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                                CCInfo.getNextStackOffset());
+                                                CCInfo.getNextStackOffset(),
+                                                true, false);
 
-    VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8);
+    VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false);
     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
 
     // The fixed integer arguments of a variadic function are
@@ -1895,7 +1896,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         CurArgOffset = CurArgOffset + (4 - ObjSize);
       }
       // The value of the object is its address.
-      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
@@ -1918,7 +1919,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
-          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
+          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
@@ -2043,7 +2044,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     if (needsLoad) {
       int FI = MFI->CreateFixedObject(ObjSize,
                                       CurArgOffset + (ArgSize - ObjSize),
-                                      isImmutable);
+                                      isImmutable, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
     }
@@ -2076,7 +2077,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     int Depth = ArgOffset;
 
     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                               Depth);
+                                               Depth, true, false);
     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
@@ -2289,7 +2290,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
     int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
                                                                    isDarwinABI);
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
-                                                          NewRetAddrLoc);
+                                                          NewRetAddrLoc,
+                                                          true, false);
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
@@ -2300,7 +2302,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
     if (isDarwinABI) {
       int NewFPLoc =
         SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
-      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+                                                          true, false);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
                            PseudoSourceValue::getFixedStack(NewFPIdx), 0);
@@ -2317,7 +2320,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
                       SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
   int Offset = ArgOffset + SPDiff;
   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
-  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false);
   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
   SDValue FIN = DAG.getFrameIndex(FI, VT);
   TailCallArgumentInfo Info;
@@ -3224,7 +3227,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
     // Find out what the fix offset of the frame pointer save area.
     int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
     // Allocate the frame index for frame pointer save area.
-    RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
+    RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset,
+                                                true, false);
     // Save the result.
     FI->setReturnAddrSaveIndex(RASI);
   }
@@ -3250,7 +3254,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
                                                            isDarwinABI);
 
     // Allocate the frame index for frame pointer save area.
-    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+                                                true, false);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);
   }
@@ -3411,7 +3416,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   // then lfd it and fcfid it.
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
@@ -3469,7 +3474,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
 
   // Save FP register to stack slot
-  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
                                  StackSlot, NULL, 0);
@@ -3667,7 +3672,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                             HasAnyUndefs) || SplatBitSize > 32)
+                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
     return SDValue();
 
   unsigned SplatBits = APSplatBits.getZExtValue();
@@ -4137,7 +4142,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
   DebugLoc dl = Op.getDebugLoc();
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(16, 16);
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index cf5c7c0..e65e644 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1043,7 +1043,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
                                                            isDarwinABI);
     // Allocate the frame index for frame pointer save area.
-    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+                                                true, false);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);                      
   }
@@ -1051,7 +1052,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // Reserve stack space to move the linkage area to in case of a tail call.
   int TCSPDelta = 0;
   if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
-    MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta);
+    MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta,
+                                         true, false);
   }
   
   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
@@ -1067,7 +1069,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
       const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
       const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC;
       RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                         RC->getAlignment()));
+                                                         RC->getAlignment(),
+                                                         false));
     }
 }
 
@@ -1356,12 +1359,6 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
   unsigned MaxAlign = MFI->getMaxAlignment();
 
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer becomes valid.
-    FrameLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
-  }
-  
   // Adjust stack pointer: r1 += NegFrameSize.
   // If there is a preferred stack alignment, align R1 now
   if (!IsPPC64) {
@@ -1431,12 +1428,18 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X0);
     }
   }
+
+  std::vector<MachineMove> &Moves = MMI->getFrameMoves();
   
+  // Add the "machine moves" for the instructions we generated above, but in
+  // reverse order.
   if (needsFrameMoves) {
-    std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-    
+    // Mark effective beginning of when frame pointer becomes valid.
+    FrameLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
+  
+    // Show update of SP.
     if (NegFrameSize) {
-      // Show update of SP.
       MachineLocation SPDst(MachineLocation::VirtualFP);
       MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
@@ -1451,31 +1454,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
       Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
     }
 
-    // Add callee saved registers to move list.
-    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
-      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-      unsigned Reg = CSI[I].getReg();
-      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
-      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-      MachineLocation CSSrc(Reg);
-      Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+    if (MustSaveLR) {
+      MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+      MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+      Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
     }
-    
-    MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
-    MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
-    Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
-    
-    // Mark effective beginning of when frame pointer is ready.
-    unsigned ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
-    
-    MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
-                                  (IsPPC64 ? PPC::X1 : PPC::R1));
-    MachineLocation FPSrc(MachineLocation::VirtualFP);
-    Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
   }
 
+  unsigned ReadyLabelId = 0;
+
   // If there is a frame pointer, copy R1 into R31
   if (HasFP) {
     if (!IsPPC64) {
@@ -1487,6 +1474,33 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X1)
         .addReg(PPC::X1);
     }
+
+    if (needsFrameMoves) {
+      ReadyLabelId = MMI->NextLabelID();
+
+      // Mark effective beginning of when frame pointer is ready.
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
+
+      MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
+                                    (IsPPC64 ? PPC::X1 : PPC::R1));
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+    }
+  }
+
+  if (needsFrameMoves) {
+    unsigned LabelId = HasFP ? ReadyLabelId : FrameLabelId;
+
+    // Add callee saved registers to move list.
+    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+      unsigned Reg = CSI[I].getReg();
+      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(LabelId, CSDst, CSSrc));
+    }
   }
 }
 
@@ -1700,7 +1714,7 @@ unsigned PPCRegisterInfo::getRARegister() const {
   return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
 }
 
-unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   if (!Subtarget.isPPC64())
     return hasFP(MF) ? PPC::R31 : PPC::R1;
   else
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 1689bc2..3aeed80 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -83,7 +83,7 @@ public:
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
   void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
   // Exception handling queries.
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 02c8ad7..75fcf62 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -101,8 +101,8 @@ public:
   const char *getTargetDataString() const {
     // Note, the alignment values for f64 and i64 on ppc64 in Darwin
     // documentation are wrong; these are correct (i.e. "what gcc does").
-    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128"
-                     : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128";
+    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+                     : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32";
   }
 
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3371954..8079c6e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -20,8 +20,7 @@
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
-                                                const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
   if (TheTriple.getOS() == Triple::Darwin)
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index a345d3d..aad621f 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -339,6 +339,8 @@ we don't have whole-function selection dags.  On x86, this means we use one
 extra register for the function when effective_addr2 is declared as U64 than
 when it is declared U32.
 
+PHI Slicing could be extended to do this.
+
 //===---------------------------------------------------------------------===//
 
 LSR should know what GPR types a target has.  This code:
@@ -406,22 +408,6 @@ return:		; preds = %then.1, %else.0, %then.0
 
 //===---------------------------------------------------------------------===//
 
-Tail recursion elimination is not transforming this function, because it is
-returning n, which fails the isDynamicConstant check in the accumulator 
-recursion checks.
-
-long long fib(const long long n) {
-  switch(n) {
-    case 0:
-    case 1:
-      return n;
-    default:
-      return fib(n-1) + fib(n-2);
-  }
-}
-
-//===---------------------------------------------------------------------===//
-
 Tail recursion elimination should handle:
 
 int pow2m1(int n) {
@@ -1229,6 +1215,40 @@ GCC PR33344 is a similar case.
 
 //===---------------------------------------------------------------------===//
 
+[PHI TRANSLATE INDEXED GEPs]  PR5313
+
+Load redundancy elimination for simple loop.  This loop:
+
+void append_text(const char* text,unsigned char * const  io) {
+  while(*text)
+    *io=*text++;
+}
+
+Compiles to have a fully redundant load in the loop (%2):
+
+define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind {
+entry:
+  %0 = load i8* %text, align 1                    ; <i8> [#uses=1]
+  %1 = icmp eq i8 %0, 0                           ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ]  ; <i32> [#uses=2]
+  %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1]
+  %2 = load i8* %text_addr.04, align 1            ; <i8> [#uses=1]
+  store i8 %2, i8* %io, align 1
+  %tmp = add i32 %indvar, 1                       ; <i32> [#uses=2]
+  %scevgep = getelementptr i8* %text, i32 %tmp    ; <i8*> [#uses=1]
+  %3 = load i8* %scevgep, align 1                 ; <i8> [#uses=1]
+  %4 = icmp eq i8 %3, 0                           ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+//===---------------------------------------------------------------------===//
+
 There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
 GCC testsuite.  There are many pre testcases as ssa-pre-*.c
 
@@ -1594,12 +1614,6 @@ int int_char(char m) {if(m>7) return 0; return m;}
 
 //===---------------------------------------------------------------------===//
 
-IPSCCP is propagating elements of first class aggregates, but is not propagating
-the entire aggregate itself.  This leads it to miss opportunities, for example
-in test/Transforms/SCCP/ipsccp-basic.ll:test5b.
-
-//===---------------------------------------------------------------------===//
-
 int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; }
 
 Generates this:
@@ -1668,3 +1682,55 @@ entry:
 }
 
 //===---------------------------------------------------------------------===//
+
+IPSCCP does not currently propagate argument dependent constants through
+functions where it does not not all of the callers.  This includes functions
+with normal external linkage as well as templates, C99 inline functions etc.
+Specifically, it does nothing to:
+
+define i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %0 = add nsw i32 %y, %z                         
+  %1 = mul i32 %0, %x                             
+  %2 = mul i32 %y, %z                             
+  %3 = add nsw i32 %1, %2                         
+  ret i32 %3
+}
+
+define i32 @test2() nounwind {
+entry:
+  %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind
+  ret i32 %0
+}
+
+It would be interesting extend IPSCCP to be able to handle simple cases like
+this, where all of the arguments to a call are constant.  Because IPSCCP runs
+before inlining, trivial templates and inline functions are not yet inlined.
+The results for a function + set of constant arguments should be memoized in a
+map.
+
+//===---------------------------------------------------------------------===//
+
+The libcall constant folding stuff should be moved out of SimplifyLibcalls into
+libanalysis' constantfolding logic.  This would allow IPSCCP to be able to
+handle simple things like this:
+
+static int foo(const char *X) { return strlen(X); }
+int bar() { return foo("abcd"); }
+
+//===---------------------------------------------------------------------===//
+
+InstCombine should use SimplifyDemandedBits to remove the or instruction:
+
+define i1 @test(i8 %x, i8 %y) {
+  %A = or i8 %x, 1
+  %B = icmp ugt i8 %A, 3
+  ret i1 %B
+}
+
+Currently instcombine calls SimplifyDemandedBits with either all bits or just
+the sign bit, if the comparison is obviously a sign test. In this case, we only
+need all but the bottom two bits from %A, and if we gave that mask to SDB it
+would delete the or instruction for us.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 452b46f..cd85dd4 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -126,7 +126,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       processDebugLoc(II, true);
       printInstruction(II);
       
-      if (VerboseAsm && !II->getDebugLoc().isUnknown())
+      if (VerboseAsm)
         EmitComments(*II);
       O << '\n';
       processDebugLoc(II, false);
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index a1a4a8e..b41917e 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -75,7 +75,6 @@ private:
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void SparcDAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
   CurBB = BB;
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 164770d..133f828 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -129,7 +129,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         }
         InVals.push_back(Arg);
       } else {
-        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+                                                            true, false);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
         SDValue Load;
         if (ObjectVT == MVT::i32) {
@@ -163,7 +164,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
         InVals.push_back(Arg);
       } else {
-        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+                                                            true, false);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
         SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0);
         InVals.push_back(Load);
@@ -184,7 +186,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
           HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
         } else {
-          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+                                                              true, false);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
           HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
         }
@@ -195,7 +198,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
           LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
         } else {
-          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4);
+          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
+                                                              true, false);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
           LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
         }
@@ -227,7 +231,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
       MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
       SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
 
-      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+                                                          true, false);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
       OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index f2f1b96..d88d508 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -277,7 +277,7 @@ let usesCustomInserter = 1 in {   // Expanded after instruction selection.
 
 // Section A.3 - Synthetic Instructions, p. 85
 // special cases of JMPL:
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in {
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
   let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
     def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
 }
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 7883260..6f6183e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -175,7 +175,7 @@ unsigned SparcRegisterInfo::getRARegister() const {
   return SP::I7;
 }
 
-unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return SP::I6;
 }
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 753b1c0..8889ea6 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -54,7 +54,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 3a38115..1eec112 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -29,7 +29,7 @@ extern "C" void LLVMInitializeSparcTarget() {
 SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
                                        const std::string &FS)
   : LLVMTargetMachine(T, TT),
-    DataLayout("E-p:32:32-f128:128:128"),
+    DataLayout("E-p:32:32-f128:128:128-n32"),
     Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
 }
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index a4a8d6a..e97e7ca 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
 
@@ -154,7 +155,7 @@ void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
   
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
 
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 028ee89..d64611d 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -603,8 +603,6 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 void SystemZDAGToDAGISel::InstructionSelect() {
-  DEBUG(BB->dump());
-
   // Codegen the basic block.
   DEBUG(errs() << "===== Instruction selection begins:\n");
   DEBUG(Indent = 0);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 5c8cae0..d6b476e 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/VectorExtras.h"
 using namespace llvm;
@@ -328,7 +329,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
       // Create the nodes corresponding to a load from this parameter slot.
       // Create the frame index object for this incoming parameter...
       int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
-                                      VA.getLocMemOffset());
+                                      VA.getLocMemOffset(), true, false);
 
       // Create the SelectionDAG nodes corresponding to a load
       // from this parameter
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 236711c..d82d928 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 38460a6..4d1c01f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -320,7 +320,8 @@ unsigned SystemZRegisterInfo::getRARegister() const {
   return 0;
 }
 
-unsigned SystemZRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   assert(0 && "What is the frame register");
   return 0;
 }
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index b22b05d..93f6aee 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -68,7 +68,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 990e003..dfa26a1 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -28,7 +28,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS),
     DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
-               "-f64:64:64-f128:128:128-a0:16:16"),
+               "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
     InstrInfo(*this), TLInfo(*this),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
 
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 5bcd658..fc71bc3 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -17,16 +17,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetData.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <cstdlib>
 using namespace llvm;
@@ -132,50 +132,18 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =
 //                       TargetData Class Implementation
 //===----------------------------------------------------------------------===//
 
-/*!
- A TargetDescription string consists of a sequence of hyphen-delimited
- specifiers for target endianness, pointer size and alignments, and various
- primitive type sizes and alignments. A typical string looks something like:
- <br><br>
- "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64"
- <br><br>
- (note: this string is not fully specified and is only an example.)
- \p
- Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align,
- below) dictates how a type will be aligned within an aggregate and when used
- as an argument.  Preferred alignment (pref_align, below) determines a type's
- alignment when emitted as a global.
- \p
- Specifier string details:
- <br><br>
- <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e"
- specifies a little-endian target data model.
- <br><br>
- <i>p:@verbatim<size>:<abi_align>:<pref_align>@endverbatim</i>: Pointer size, 
- ABI and preferred alignment.
- <br><br>
- <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type
- alignment. Type is
- one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector, or
- aggregate.  Size indicates the size, e.g., 32 or 64 bits.
- \p
- The default string, fully specified, is:
- <br><br>
- "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64"
- "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64"
- "-v64:64:64-v128:128:128"
- <br><br>
- Note that in the case of aggregates, 0 is the default ABI and preferred
- alignment. This is a special case, where the aggregate's computed worst-case
- alignment will be used.
- */ 
-void TargetData::init(const std::string &TargetDescription) {
-  std::string temp = TargetDescription;
-  
+/// getInt - Get an integer ignoring errors.
+static unsigned getInt(StringRef R) {
+  unsigned Result = 0;
+  R.getAsInteger(10, Result);
+  return Result;
+}
+
+void TargetData::init(StringRef Desc) {
   LayoutMap = 0;
   LittleEndian = false;
   PointerMemSize = 8;
-  PointerABIAlign   = 8;
+  PointerABIAlign = 8;
   PointerPrefAlign = PointerABIAlign;
 
   // Default alignments
@@ -190,11 +158,21 @@ void TargetData::init(const std::string &TargetDescription) {
   setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
   setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
 
-  while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
-    std::string arg0 = getToken(token, ":");
-    const char *p = arg0.c_str();
-    switch(*p) {
+  while (!Desc.empty()) {
+    std::pair<StringRef, StringRef> Split = Desc.split('-');
+    StringRef Token = Split.first;
+    Desc = Split.second;
+    
+    if (Token.empty())
+      continue;
+    
+    Split = Token.split(':');
+    StringRef Specifier = Split.first;
+    Token = Split.second;
+    
+    assert(!Specifier.empty() && "Can't be empty here");
+    
+    switch (Specifier[0]) {
     case 'E':
       LittleEndian = false;
       break;
@@ -202,9 +180,12 @@ void TargetData::init(const std::string &TargetDescription) {
       LittleEndian = true;
       break;
     case 'p':
-      PointerMemSize = atoi(getToken(token,":").c_str()) / 8;
-      PointerABIAlign = atoi(getToken(token,":").c_str()) / 8;
-      PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8;
+      Split = Token.split(':');
+      PointerMemSize = getInt(Split.first) / 8;
+      Split = Split.second.split(':');
+      PointerABIAlign = getInt(Split.first) / 8;
+      Split = Split.second.split(':');
+      PointerPrefAlign = getInt(Split.first) / 8;
       if (PointerPrefAlign == 0)
         PointerPrefAlign = PointerABIAlign;
       break;
@@ -213,28 +194,52 @@ void TargetData::init(const std::string &TargetDescription) {
     case 'f':
     case 'a':
     case 's': {
-      AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning
-      switch(*p) {
-        case 'i': align_type = INTEGER_ALIGN; break;
-        case 'v': align_type = VECTOR_ALIGN; break;
-        case 'f': align_type = FLOAT_ALIGN; break;
-        case 'a': align_type = AGGREGATE_ALIGN; break;
-        case 's': align_type = STACK_ALIGN; break;
+      AlignTypeEnum AlignType;
+      switch (Specifier[0]) {
+      default:
+      case 'i': AlignType = INTEGER_ALIGN; break;
+      case 'v': AlignType = VECTOR_ALIGN; break;
+      case 'f': AlignType = FLOAT_ALIGN; break;
+      case 'a': AlignType = AGGREGATE_ALIGN; break;
+      case 's': AlignType = STACK_ALIGN; break;
       }
-      uint32_t size = (uint32_t) atoi(++p);
-      unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8;
-      unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8;
-      if (pref_align == 0)
-        pref_align = abi_align;
-      setAlignment(align_type, abi_align, pref_align, size);
+      unsigned Size = getInt(Specifier.substr(1));
+      Split = Token.split(':');
+      unsigned char ABIAlign = getInt(Split.first) / 8;
+      
+      Split = Split.second.split(':');
+      unsigned char PrefAlign = getInt(Split.first) / 8;
+      if (PrefAlign == 0)
+        PrefAlign = ABIAlign;
+      setAlignment(AlignType, ABIAlign, PrefAlign, Size);
       break;
     }
+    case 'n':  // Native integer types.
+      Specifier = Specifier.substr(1);
+      do {
+        if (unsigned Width = getInt(Specifier))
+          LegalIntWidths.push_back(Width);
+        Split = Token.split(':');
+        Specifier = Split.first;
+        Token = Split.second;
+      } while (!Specifier.empty() || !Token.empty());
+      break;
+        
     default:
       break;
     }
   }
 }
 
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetData::TargetData() : ImmutablePass(&ID) {
+  llvm_report_error("Bad TargetData ctor used.  "
+                    "Tool did not specify a TargetData to use?");
+}
+
 TargetData::TargetData(const Module *M) 
   : ImmutablePass(&ID) {
   init(M->getDataLayout());
@@ -318,37 +323,130 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
                  : Alignments[BestMatchIdx].PrefAlign;
 }
 
-typedef DenseMap<const StructType*, StructLayout*>LayoutInfoTy;
+typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
 
-TargetData::~TargetData() {
-  if (!LayoutMap)
-    return;
-  
-  // Remove any layouts for this TD.
-  LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap);
-  for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) {
-    I->second->~StructLayout();
-    free(I->second);
-    TheMap.erase(I++);
+namespace llvm {
+
+class StructLayoutMap : public AbstractTypeUser {
+  LayoutInfoTy LayoutInfo;
+
+  /// refineAbstractType - The callback method invoked when an abstract type is
+  /// resolved to another type.  An object must override this method to update
+  /// its internal state to reference NewType instead of OldType.
+  ///
+  virtual void refineAbstractType(const DerivedType *OldTy,
+                                  const Type *) {
+    const StructType *STy = dyn_cast<const StructType>(OldTy);
+    if (!STy) {
+      OldTy->removeAbstractTypeUser(this);
+      return;
+    }
+
+    StructLayout *SL = LayoutInfo[STy];
+    if (SL) {
+      SL->~StructLayout();
+      free(SL);
+      LayoutInfo[STy] = NULL;
+    }
+
+    OldTy->removeAbstractTypeUser(this);
   }
-  
-  delete static_cast<LayoutInfoTy*>(LayoutMap);
+
+  /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware
+  /// of is when a type makes the transition from being abstract (where it has
+  /// clients on its AbstractTypeUsers list) to concrete (where it does not).
+  /// This method notifies ATU's when this occurs for a type.
+  ///
+  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
+    const StructType *STy = dyn_cast<const StructType>(AbsTy);
+    if (!STy) {
+      AbsTy->removeAbstractTypeUser(this);
+      return;
+    }
+
+    StructLayout *SL = LayoutInfo[STy];
+    if (SL) {
+      SL->~StructLayout();
+      free(SL);
+      LayoutInfo[STy] = NULL;
+    }
+
+    AbsTy->removeAbstractTypeUser(this);
+  }
+
+  bool insert(const Type *Ty) {
+    if (Ty->isAbstract())
+      Ty->addAbstractTypeUser(this);
+    return true;
+  }
+
+public:
+  virtual ~StructLayoutMap() {
+    // Remove any layouts.
+    for (LayoutInfoTy::iterator
+           I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I)
+      if (StructLayout *SL = I->second) {
+        SL->~StructLayout();
+        free(SL);
+      }
+  }
+
+  inline LayoutInfoTy::iterator begin() {
+    return LayoutInfo.begin();
+  }
+  inline LayoutInfoTy::iterator end() {
+    return LayoutInfo.end();
+  }
+  inline LayoutInfoTy::const_iterator begin() const {
+    return LayoutInfo.begin();
+  }
+  inline LayoutInfoTy::const_iterator end() const {
+    return LayoutInfo.end();
+  }
+
+  LayoutInfoTy::iterator find(const StructType *&Val) {
+    return LayoutInfo.find(Val);
+  }
+  LayoutInfoTy::const_iterator find(const StructType *&Val) const {
+    return LayoutInfo.find(Val);
+  }
+
+  bool erase(const StructType *&Val) {
+    return LayoutInfo.erase(Val);
+  }
+  bool erase(LayoutInfoTy::iterator I) {
+    return LayoutInfo.erase(I);
+  }
+
+  StructLayout *&operator[](const Type *Key) {
+    const StructType *STy = dyn_cast<const StructType>(Key);
+    assert(STy && "Trying to access the struct layout map with a non-struct!");
+    insert(STy);
+    return LayoutInfo[STy];
+  }
+
+  // for debugging...
+  virtual void dump() const {}
+};
+
+} // end namespace llvm
+
+TargetData::~TargetData() {
+  delete LayoutMap;
 }
 
 const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
   if (!LayoutMap)
-    LayoutMap = static_cast<void*>(new LayoutInfoTy());
-  
-  LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap);
+    LayoutMap = new StructLayoutMap();
   
-  StructLayout *&SL = TheMap[Ty];
+  StructLayout *&SL = (*LayoutMap)[Ty];
   if (SL) return SL;
 
   // Otherwise, create the struct layout.  Because it is variable length, we 
   // malloc it, then use placement new.
   int NumElts = Ty->getNumElements();
   StructLayout *L =
-    (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t));
+    (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
   
   // Set SL before calling StructLayout's ctor.  The ctor could cause other
   // entries to be added to TheMap, invalidating our reference.
@@ -365,31 +463,35 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
 void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
   if (!LayoutMap) return;  // No cache.
   
-  LayoutInfoTy* LayoutInfo = static_cast<LayoutInfoTy*>(LayoutMap);
-  LayoutInfoTy::iterator I = LayoutInfo->find(Ty);
-  if (I == LayoutInfo->end()) return;
+  DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty);
+  if (I == LayoutMap->end()) return;
   
   I->second->~StructLayout();
   free(I->second);
-  LayoutInfo->erase(I);
+  LayoutMap->erase(I);
 }
 
 
 std::string TargetData::getStringRepresentation() const {
-  std::string repr;
-  repr.append(LittleEndian ? "e" : "E");
-  repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))).
-      append(":").append(itostr((int64_t) (PointerABIAlign * 8))).
-      append(":").append(itostr((int64_t) (PointerPrefAlign * 8)));
-  for (align_const_iterator I = Alignments.begin();
-       I != Alignments.end();
-       ++I) {
-    repr.append("-").append(1, (char) I->AlignType).
-      append(utostr((int64_t) I->TypeBitWidth)).
-      append(":").append(utostr((uint64_t) (I->ABIAlign * 8))).
-      append(":").append(utostr((uint64_t) (I->PrefAlign * 8)));
+  std::string Result;
+  raw_string_ostream OS(Result);
+  
+  OS << (LittleEndian ? "e" : "E")
+     << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
+     << ':' << PointerPrefAlign*8;
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    const TargetAlignElem &AI = Alignments[i];
+    OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+       << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+  }
+  
+  if (!LegalIntWidths.empty()) {
+    OS << "-n" << (unsigned)LegalIntWidths[0];
+    
+    for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+      OS << ':' << (unsigned)LegalIntWidths[i];
   }
-  return repr;
+  return OS.str();
 }
 
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index c1aab99..f887523 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -151,7 +152,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     // relocation, then we may have to drop this into a wriable data section
     // even though it is marked const.
     switch (C->getRelocationInfo()) {
-    default: llvm_unreachable("unknown relocation info kind");
+    default: assert(0 && "unknown relocation info kind");
     case Constant::NoRelocation:
       // If initializer is a null-terminated string, put it in a "cstring"
       // section of the right width.
@@ -219,7 +220,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getDataNoRel();
 
   switch (C->getRelocationInfo()) {
-  default: llvm_unreachable("unknown relocation info kind");
+  default: assert(0 && "unknown relocation info kind");
   case Constant::NoRelocation:
     return SectionKind::getDataNoRel();
   case Constant::LocalRelocation:
@@ -671,7 +672,7 @@ TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
 
 
 const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(const StringRef &Segment, const StringRef &Section,
+getMachOSection(StringRef Segment, StringRef Section,
                 unsigned TypeAndAttributes,
                 unsigned Reserved2, SectionKind Kind) const {
   // We unique sections by their segment/section pair.  The returned section
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp
index 95c92ca..edb76f9 100644
--- a/lib/Target/TargetSubtarget.cpp
+++ b/lib/Target/TargetSubtarget.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 //---------------------------------------------------------------------------
@@ -20,3 +21,13 @@ using namespace llvm;
 TargetSubtarget::TargetSubtarget() {}
 
 TargetSubtarget::~TargetSubtarget() {}
+
+bool TargetSubtarget::enablePostRAScheduler(
+          CodeGenOpt::Level OptLevel,
+          AntiDepBreakMode& Mode,
+          RegClassVector& CriticalPathRCs) const {
+  Mode = ANTIDEP_NONE;
+  CriticalPathRCs.clear();
+  return false;
+}
+
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index ae8e6d3..b88063f 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -651,7 +651,7 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   
   printInstructionThroughMCStreamer(MI);
   
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
 
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 821cca4..be9f4b2 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/ADT/SmallString.h"
@@ -405,7 +406,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     printLabel(MI);
     return;
   case TargetInstrInfo::INLINEASM:
-    O << '\t';
     printInlineAsm(MI);
     return;
   case TargetInstrInfo::IMPLICIT_DEF:
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index a0bded3..4497931 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -82,7 +82,7 @@ namespace {
     void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
     void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
                            intptr_t Disp = 0, intptr_t PCAdj = 0,
-                           bool NeedStub = false, bool Indirect = false);
+                           bool Indirect = false);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
     void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
                               intptr_t PCAdj = 0);
@@ -176,7 +176,6 @@ template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
                                 intptr_t Disp /* = 0 */,
                                 intptr_t PCAdj /* = 0 */,
-                                bool NeedStub /* = false */,
                                 bool Indirect /* = false */) {
   intptr_t RelocCST = Disp;
   if (Reloc == X86::reloc_picrel_word)
@@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
     RelocCST = PCAdj;
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, RelocCST, NeedStub)
+                                           GV, RelocCST, false)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, RelocCST, NeedStub);
+                               GV, RelocCST, false);
   MCE.addRelocation(MR);
   // The relocated value will be added to the displacement
   if (Reloc == X86::reloc_absolute_dword)
@@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
     // do it, otherwise fallback to absolute (this is determined by IsPCRel). 
     //  89 05 00 00 00 00     mov    %eax,0(%rip)  # PC-relative
     //  89 04 25 00 00 00 00  mov    %eax,0x0      # Absolute
-    bool NeedStub = isa<Function>(RelocOp->getGlobal());
     bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
     emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
-                      Adj, NeedStub, Indirect);
+                      Adj, Indirect);
   } else if (RelocOp->isSymbol()) {
     emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
   } else if (RelocOp->isCPI()) {
@@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     }
     
     if (MO.isGlobal()) {
-      // Assume undefined functions may be outside the Small codespace.
-      bool NeedStub = 
-        (Is64BitMode && 
-            (TM.getCodeModel() == CodeModel::Large ||
-             TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
-        Opcode == X86::TAILJMPd;
       emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
-                        MO.getOffset(), 0, NeedStub);
+                        MO.getOffset(), 0);
       break;
     }
     
@@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     if (Opcode == X86::MOV64ri)
       rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
     if (MO1.isGlobal()) {
-      bool NeedStub = isa<Function>(MO1.getGlobal());
       bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
       emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                        NeedStub, Indirect);
+                        Indirect);
     } else if (MO1.isSymbol())
       emitExternalSymbolAddress(MO1.getSymbolName(), rt);
     else if (MO1.isCPI())
@@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     if (Opcode == X86::MOV64ri32)
       rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
     if (MO1.isGlobal()) {
-      bool NeedStub = isa<Function>(MO1.getGlobal());
       bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
       emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                        NeedStub, Indirect);
+                        Indirect);
     } else if (MO1.isSymbol())
       emitExternalSymbolAddress(MO1.getSymbolName(), rt);
     else if (MO1.isCPI())
@@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     if (Opcode == X86::MOV64mi32)
       rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
     if (MO.isGlobal()) {
-      bool NeedStub = isa<Function>(MO.getGlobal());
       bool Indirect = gvNeedsNonLazyPtr(MO, TM);
       emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
-                        NeedStub, Indirect);
+                        Indirect);
     } else if (MO.isSymbol())
       emitExternalSymbolAddress(MO.getSymbolName(), rt);
     else if (MO.isCPI())
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3401df0..431c120 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1493,7 +1493,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       EVT ResVT = RVLocs[0].getValVT();
       unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
       unsigned MemSize = ResVT.getSizeInBits()/8;
-      int FI = MFI.CreateStackObject(MemSize, MemSize);
+      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
       addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
       DstRC = ResVT == MVT::f32
         ? X86::FR32RegisterClass : X86::FR64RegisterClass;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 122f515..6a3577a 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,6 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Force NDEBUG on in any optimized build on Darwin.
+//
+// FIXME: This is a huge hack, to work around ridiculously awful compile times
+// on this file with gcc-4.2 on Darwin, in Release mode.
+#if (!defined(__llvm__) && defined(__APPLE__) && \
+     defined(__OPTIMIZE__) && !defined(NDEBUG))
+#define NDEBUG
+#endif
+
 #define DEBUG_TYPE "x86-isel"
 #include "X86.h"
 #include "X86InstrBuilder.h"
@@ -661,7 +670,6 @@ void X86DAGToDAGISel::InstructionSelect() {
   const Function *F = MF->getFunction();
   OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
 
-  DEBUG(BB->dump());
   if (OptLevel != CodeGenOpt::None)
     PreprocessForRMW();
 
@@ -1950,14 +1958,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
                             0);
           // We just did a 32-bit clear, insert it into a 64-bit register to
           // clear the whole 64-bit reg.
-          SDValue Undef =
-            SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
-                                           dl, MVT::i64), 0);
+          SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
           SDValue SubRegNo =
             CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
           ClrNode =
-            SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
-                                           MVT::i64, Undef, ClrNode, SubRegNo),
+            SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
+                                           MVT::i64, Zero, ClrNode, SubRegNo),
                     0);
         } else {
           ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 86ec9f2..6018cf5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1087,6 +1087,17 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
 
 #include "X86GenCallingConv.inc"
 
+bool 
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+                        const SmallVectorImpl<EVT> &OutTys,
+                        const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+                        SelectionDAG &DAG) {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
+}
+
 SDValue
 X86TargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
@@ -1370,7 +1381,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
   // In case of tail call optimization mark all arguments mutable. Since they
   // could be overwritten by lowering of arguments in case of a tail call.
   int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
-                                  VA.getLocMemOffset(), isImmutable);
+                                  VA.getLocMemOffset(), isImmutable, false);
   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   if (Flags.isByVal())
     return FIN;
@@ -1499,7 +1510,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
     if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+      VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false);
     }
     if (Is64Bit) {
       unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1550,7 +1561,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       VarArgsGPOffset = NumIntRegs * 8;
       VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
       RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
-                                                 TotalNumXMMRegs * 16, 16);
+                                                 TotalNumXMMRegs * 16, 16,
+                                                 false);
 
       // Store the integer parameter registers.
       SmallVector<SDValue, 8> MemOps;
@@ -1671,7 +1683,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
   // Calculate the new stack slot for the return address.
   int SlotSize = Is64Bit ? 8 : 4;
   int NewReturnAddrFI =
-    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize,
+                                         true, false);
   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -1884,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         // Create frame index.
         int32_t Offset = VA.getLocMemOffset()+FPDiff;
         uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
-        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false);
         FIN = DAG.getFrameIndex(FI, getPointerTy());
 
         if (Flags.isByVal()) {
@@ -1924,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                      FPDiff, dl);
   }
 
-  // If the callee is a GlobalAddress node (quite common, every direct call is)
-  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+  bool WasGlobalOrExternal = false;
+  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+    assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+    // In the 64-bit large code model, we have to make all calls
+    // through a register, since the call instruction's 32-bit
+    // pc-relative offset may not be large enough to hold the whole
+    // address.
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    WasGlobalOrExternal = true;
+    // If the callee is a GlobalAddress node (quite common, every direct call
+    // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+    // it.
+
     // We should use extra load for direct calls to dllimported functions in
     // non-JIT mode.
     GlobalValue *GV = G->getGlobal();
@@ -1954,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                           G->getOffset(), OpFlags);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    WasGlobalOrExternal = true;
     unsigned char OpFlags = 0;
 
     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -1971,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
                                          OpFlags);
-  } else if (isTailCall) {
+  }
+
+  if (isTailCall && !WasGlobalOrExternal) {
     unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
 
     Chain = DAG.getCopyToReg(Chain,  dl,
@@ -2169,7 +2195,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
     uint64_t SlotSize = TD->getPointerSize();
-    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize);
+    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+                                                           true, false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
@@ -2517,6 +2544,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
          isUndefOrEqual(N->getMaskElt(3), 3);
 }
 
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+  
+  if (NumElems != 4)
+    return false;
+  
+  return isUndefOrEqual(N->getMaskElt(0), 2) &&
+  isUndefOrEqual(N->getMaskElt(1), 3) &&
+  isUndefOrEqual(N->getMaskElt(2), 2) &&
+  isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
 bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
@@ -2536,10 +2578,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
   return true;
 }
 
-/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
-/// and MOVLHPS.
-bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
   unsigned NumElems = N->getValueType(0).getVectorNumElements();
 
   if (NumElems != 2 && NumElems != 4)
@@ -2556,21 +2597,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
   return true;
 }
 
-/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-/// <2, 3, 2, 3>
-bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
-  unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
-  if (NumElems != 4)
-    return false;
-
-  return isUndefOrEqual(N->getMaskElt(0), 2) &&
-         isUndefOrEqual(N->getMaskElt(1), 3) &&
-         isUndefOrEqual(N->getMaskElt(2), 2) &&
-         isUndefOrEqual(N->getMaskElt(3), 3);
-}
-
 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
 static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
@@ -4264,7 +4290,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
                  X86::isMOVSLDUPMask(SVOp) ||
                  X86::isMOVHLPSMask(SVOp) ||
-                 X86::isMOVHPMask(SVOp) ||
+                 X86::isMOVLHPSMask(SVOp) ||
                  X86::isMOVLPMask(SVOp)))
     return Op;
 
@@ -4961,7 +4987,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   unsigned Size = SrcVT.getSizeInBits()/8;
   MachineFunction &MF = DAG.getMachineFunction();
-  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot,
@@ -4995,7 +5021,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     // shouldn't be necessary except that RFP cannot be live across
     // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
     SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
     Tys = DAG.getVTList(MVT::Other);
     SmallVector<SDValue, 8> Ops;
@@ -5205,7 +5231,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   // stack slot.
   MachineFunction &MF = DAG.getMachineFunction();
   unsigned MemSize = DstTy.getSizeInBits()/8;
-  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
   unsigned Opc;
@@ -5228,7 +5254,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
     };
     Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
     Chain = Value.getValue(1);
-    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   }
 
@@ -6752,7 +6778,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
 
   // Save FP Control Word to stack slot
-  int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+  int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
   SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
@@ -7977,7 +8003,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     // Change the floating point control register to use "round towards zero"
     // mode when truncating to an integer value.
     MachineFunction *F = BB->getParent();
-    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
     addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
 
     // Load the old value of the high byte of the control word...
@@ -9585,14 +9611,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     }
 
     // GCC allows "st(0)" to be called just plain "st".
-    if (StringsEqualNoCase("{st}", Constraint)) {
+    if (StringRef("{st}").equals_lower(Constraint)) {
       Res.first = X86::ST0;
       Res.second = X86::RFP80RegisterClass;
       return Res;
     }
 
     // flags -> EFLAGS
-    if (StringsEqualNoCase("{flags}", Constraint)) {
+    if (StringRef("{flags}").equals_lower(Constraint)) {
       Res.first = X86::EFLAGS;
       Res.second = X86::CCRRegisterClass;
       return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b59b81..7b4ab62 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -286,7 +286,7 @@ namespace llvm {
     /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
     /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
     /// as well as MOVLHPS.
-    bool isMOVHPMask(ShuffleVectorSDNode *N);
+    bool isMOVLHPSMask(ShuffleVectorSDNode *N);
 
     /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
     /// specifies a shuffle of elements that is suitable for input to UNPCKL.
@@ -699,6 +699,12 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   DebugLoc dl, SelectionDAG &DAG);
 
+    virtual bool
+      CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<EVT> &OutTys,
+                     const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+                     SelectionDAG &DAG);
+
     void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                  SelectionDAG &DAG, unsigned NewOp);
 
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 3edced7..a01534b 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -309,7 +309,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                       [(set GR64:$dst, i64immSExt32:$src)]>;
 }
 
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
 def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}",
                  [(set GR64:$dst, (load addr:$src))]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 87bc10d..1ddceb1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -26,11 +26,15 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
+
+#include <limits>
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -707,9 +711,23 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
   }
 }
 
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
-                                           int &FrameIndex) const {
-  switch (MI->getOpcode()) {
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                                  int &FrameIndex) const {
+  if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+      MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+      MI->getOperand(Op+1).getImm() == 1 &&
+      MI->getOperand(Op+2).getReg() == 0 &&
+      MI->getOperand(Op+3).getImm() == 0) {
+    FrameIndex = MI->getOperand(Op).getIndex();
+    return true;
+  }
+  return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+  switch (Opcode) {
   default: break;
   case X86::MOV8rm:
   case X86::MOV16rm:
@@ -723,22 +741,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   case X86::MOVDQArm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
-    if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
-        MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
-        MI->getOperand(2).getImm() == 1 &&
-        MI->getOperand(3).getReg() == 0 &&
-        MI->getOperand(4).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
+    return true;
     break;
   }
-  return 0;
+  return false;
 }
 
-unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                          int &FrameIndex) const {
-  switch (MI->getOpcode()) {
+static bool isFrameStoreOpcode(int Opcode) {
+  switch (Opcode) {
   default: break;
   case X86::MOV8mr:
   case X86::MOV16mr:
@@ -753,19 +763,83 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   case X86::MMX_MOVD64mr:
   case X86::MMX_MOVQ64mr:
   case X86::MMX_MOVNTQmr:
-    if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
-        MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
-        MI->getOperand(1).getImm() == 1 &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(0).getIndex();
+    return true;
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
+                                           int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode()))
+    if (isFrameOperand(MI, 1, FrameIndex))
+      return MI->getOperand(0).getReg();
+  return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 
+                                                 int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    return hasLoadFromStackSlot(MI, FrameIndex);
+  }
+  return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                        int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isLoad() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        return true;
+      }
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode()))
+    if (isFrameOperand(MI, 0, FrameIndex))
       return MI->getOperand(X86AddrNumOperands).getReg();
-    }
-    break;
+  return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    return hasStoreToStackSlot(MI, FrameIndex);
   }
   return 0;
 }
 
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isStore() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        return true;
+      }
+  }
+  return false;
+}
+
 /// regIsPICBase - Return true if register is PIC base (i.e.g defined by
 /// X86::MOVPC32r.
 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -794,10 +868,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
     case X86::MOVSSrm:
     case X86::MOVSDrm:
     case X86::MOVAPSrm:
+    case X86::MOVUPSrm:
+    case X86::MOVUPSrm_Int:
     case X86::MOVAPDrm:
     case X86::MOVDQArm:
     case X86::MMX_MOVD64rm:
-    case X86::MMX_MOVQ64rm: {
+    case X86::MMX_MOVQ64rm:
+    case X86::FsMOVAPSrm:
+    case X86::FsMOVAPDrm: {
       // Loads from constant pools are trivially rematerializable.
       if (MI->getOperand(1).isReg() &&
           MI->getOperand(2).isImm() &&
@@ -917,12 +995,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
 void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator I,
                                  unsigned DestReg, unsigned SubIdx,
-                                 const MachineInstr *Orig) const {
+                                 const MachineInstr *Orig,
+                                 const TargetRegisterInfo *TRI) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
-    DestReg = RI.getSubReg(DestReg, SubIdx);
+    DestReg = TRI->getSubReg(DestReg, SubIdx);
     SubIdx = 0;
   }
 
@@ -1891,8 +1970,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
                                   MachineInstr::mmo_iterator MMOBegin,
                                   MachineInstr::mmo_iterator MMOEnd,
                                   SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  bool isAligned = (RI.getStackAlignment() >= 16) ||
-    RI.needsStackRealignment(MF);
+  bool isAligned = (*MMOBegin)->getAlignment() >= 16;
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
   DebugLoc DL = DebugLoc::getUnknownLoc();
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -1985,8 +2063,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                  MachineInstr::mmo_iterator MMOBegin,
                                  MachineInstr::mmo_iterator MMOEnd,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  bool isAligned = (RI.getStackAlignment() >= 16) ||
-    RI.needsStackRealignment(MF);
+  bool isAligned = (*MMOBegin)->getAlignment() >= 16;
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
   DebugLoc DL = DebugLoc::getUnknownLoc();
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2170,7 +2247,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   // If table selected...
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
       OpcodeTablePtr->find((unsigned*)MI->getOpcode());
     if (I != OpcodeTablePtr->end()) {
       unsigned Opcode = I->second.first;
@@ -2402,7 +2479,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
       OpcodeTablePtr->find((unsigned*)Opc);
     if (I != OpcodeTablePtr->end())
       return true;
@@ -2413,7 +2490,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
 bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                 unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
     MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
   if (I == MemOp2RegOpTable.end())
     return false;
@@ -2530,7 +2607,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   if (!N->isMachineOpcode())
     return false;
 
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
     MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
   if (I == MemOp2RegOpTable.end())
     return false;
@@ -2563,17 +2640,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   MachineFunction &MF = DAG.getMachineFunction();
   if (FoldedLoad) {
     EVT VT = *RC->vt_begin();
-    bool isAligned = (RI.getStackAlignment() >= 16) ||
-      RI.needsStackRealignment(MF);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                            cast<MachineSDNode>(N)->memoperands_end());
+    bool isAligned = (*MMOs.first)->getAlignment() >= 16;
     Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
                               VT, MVT::Other, &AddrOps[0], AddrOps.size());
     NewNodes.push_back(Load);
 
     // Preserve memory reference information.
-    std::pair<MachineInstr::mmo_iterator,
-              MachineInstr::mmo_iterator> MMOs =
-      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
-                            cast<MachineSDNode>(N)->memoperands_end());
     cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
   }
 
@@ -2601,8 +2677,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     AddrOps.pop_back();
     AddrOps.push_back(SDValue(NewNode, 0));
     AddrOps.push_back(Chain);
-    bool isAligned = (RI.getStackAlignment() >= 16) ||
-      RI.needsStackRealignment(MF);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                             cast<MachineSDNode>(N)->memoperands_end());
+    bool isAligned = (*MMOs.first)->getAlignment() >= 16;
     SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
                                                          isAligned, TM),
                                        dl, MVT::Other,
@@ -2610,10 +2689,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     NewNodes.push_back(Store);
 
     // Preserve memory reference information.
-    std::pair<MachineInstr::mmo_iterator,
-              MachineInstr::mmo_iterator> MMOs =
-      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
-                             cast<MachineSDNode>(N)->memoperands_end());
     cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
   }
 
@@ -2623,7 +2698,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
 unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
                                       bool UnfoldLoad, bool UnfoldStore,
                                       unsigned *LoadRegIndex) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
     MemOp2RegOpTable.find((unsigned*)Opc);
   if (I == MemOp2RegOpTable.end())
     return 0;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 6eb07d5..c6daa25 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -449,13 +449,41 @@ public:
                            unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
 
   unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                     int &FrameIndex) const;
+
+  /// hasLoadFromStackSlot - If the specified machine instruction has
+  /// a load from a stack slot, return true along with the FrameIndex
+  /// of the loaded stack slot.  If not, return false.  Unlike
+  /// isLoadFromStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is a hint only and may not catch all
+  /// cases.
+  bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
   unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                    int &FrameIndex) const;
+
+  /// hasStoreToStackSlot - If the specified machine instruction has a
+  /// store to a stack slot, return true along with the FrameIndex of
+  /// the loaded stack slot.  If not, return false.  Unlike
+  /// isStoreToStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is a hint only and may not catch all
+  /// cases.
+  bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
 
   bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
                                          AliasAnalysis *AA) const;
   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                      unsigned DestReg, unsigned SubIdx,
-                     const MachineInstr *Orig) const;
+                     const MachineInstr *Orig,
+                     const TargetRegisterInfo *TRI) const;
 
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
@@ -610,6 +638,11 @@ private:
                                      unsigned OpNum,
                                      const SmallVectorImpl<MachineOperand> &MOs,
                                      unsigned Size, unsigned Alignment) const;
+
+  /// isFrameOperand - Return true and the FrameIndex if the specified
+  /// operand and follow operands form a reference to the stack frame.
+  bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                      int &FrameIndex) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9b82e1e..a79f262 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -543,7 +543,7 @@ let neverHasSideEffects = 1 in {
 }
 
 // Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
 def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
 
 // PIC base construction.  This expands to code that looks like this:
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index be242a0..ee63d56 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
   return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
 }]>;
 
-def movhp : PatFrag<(ops node:$lhs, node:$rhs),
-                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
 }]>;
 
 def movlp : PatFrag<(ops node:$lhs, node:$rhs),
@@ -497,7 +497,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
 
 // Alias instruction to load FR32 from f128mem using movaps. Upper bits are
 // disregarded.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
                      "movaps\t{$src, $dst|$dst, $src}",
                      [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
@@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
 let neverHasSideEffects = 1 in
 def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
 def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                    "movups\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (loadv4f32 addr:$src))]>;
@@ -715,7 +715,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    [(store (v4f32 VR128:$src), addr:$dst)]>;
 
 // Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
 def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "movups\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
@@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in {
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                        "movhps\t{$src2, $dst|$dst, $src2}",
        [(set VR128:$dst,
-         (movhp VR128:$src1,
+         (movlhps VR128:$src1,
                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
   } // AddedComplexity
 } // Constraints = "$src1 = $dst"
@@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
                                      (ins VR128:$src1, VR128:$src2),
                     "movlhps\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
-                      (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
+                      (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
 
 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                      (ins VR128:$src1, VR128:$src2),
@@ -1256,7 +1256,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
 
 // Alias instruction to load FR64 from f128mem using movapd. Upper bits are
 // disregarded.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
                      "movapd\t{$src, $dst|$dst, $src}",
                      [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
@@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in {
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                        "movhpd\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst,
-                         (v2f64 (movhp VR128:$src1,
+                         (v2f64 (movlhps VR128:$src1,
                                  (scalar_to_vector (loadf64 addr:$src2)))))]>;
   } // AddedComplexity
 } // Constraints = "$src1 = $dst"
@@ -2085,7 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
                      [(set VR128:$dst, (v4i32 (pshufd:$src2
                                              (bc_v4i32(memopv2i64 addr:$src1)),
                                              (undef))))]>;
-}                                             
+}
 
 // SSE2 with ImmT == Imm8 and XS prefix.
 def PSHUFHWri : Ii8<0x70, MRMSrcReg,
@@ -2874,7 +2874,7 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
           (PALIGNR128rr VR128:$src2, VR128:$src1,
                         (SHUFFLE_get_palign_imm VR128:$src3))>,
       Requires<[HasSSSE3]>;
-}      
+}
 
 def : Pat<(X86pshufb VR128:$src, VR128:$mask),
           (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
@@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
 
 let AddedComplexity = 20 in {
 // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
           (MOVLHPSrr VR128:$src1, VR128:$src2)>;
 
 // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
@@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
 
 let AddedComplexity = 20 in {
 // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
 def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
           (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
 def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
           (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
-          (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
-          (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
 def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
           (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
 def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
           (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
-          (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
-          (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
 }
 
 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
 def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
           (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
 def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
           (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
-          (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
-          (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
 def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
                  addr:$src1),
           (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
 def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
           (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
-                 addr:$src1),
-          (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
-          (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
 
 let AddedComplexity = 15 in {
 // Setting the lowest element in the vector.
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 62ca47f..0792bdd 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
   // Rewrite the call target... so that we don't end up here every time we
   // execute the call.
 #if defined (X86_64_JIT)
-  if (!isStub)
-    *(intptr_t *)(RetAddr - 0xa) = NewVal;
+  assert(isStub &&
+         "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+         " the call instruction varies too much.");
 #else
   *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
 #endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c5ff525..f577fcf 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -392,6 +392,11 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(X86::SP);
   Reserved.set(X86::SPL);
 
+  // Set the instruction pointer register and its aliases as reserved.
+  Reserved.set(X86::RIP);
+  Reserved.set(X86::EIP);
+  Reserved.set(X86::IP);
+
   // Set the frame-pointer register and its aliases as reserved if needed.
   if (hasFP(MF)) {
     Reserved.set(X86::RBP);
@@ -450,12 +455,17 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
 
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool requiresRealignment =
+    RealignStack && (MFI->getMaxAlignment() > StackAlign);
 
   // FIXME: Currently we don't support stack realignment for functions with
-  //        variable-sized allocas
-  return (RealignStack &&
-          (MFI->getMaxAlignment() > StackAlign &&
-           !MFI->hasVarSizedObjects()));
+  //        variable-sized allocas.
+  // FIXME: Temporary disable the error - it seems to be too conservative.
+  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+    llvm_report_error(
+      "Stack realignment in presense of dynamic allocas is not supported");
+
+  return (requiresRealignment && !MFI->hasVarSizedObjects());
 }
 
 bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
@@ -610,8 +620,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // Offset is a 32-bit integer.
     int Offset = getFrameIndexOffset(MF, FrameIndex) +
       (int)(MI.getOperand(i + 3).getImm());
-  
-     MI.getOperand(i + 3).ChangeToImmediate(Offset);
+
+    MI.getOperand(i + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
     uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
@@ -647,7 +657,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     //   }
     //   [EBP]
     MFI->CreateFixedObject(-TailCallReturnAddrDelta,
-                           (-1U*SlotSize)+TailCallReturnAddrDelta);
+                           (-1U*SlotSize)+TailCallReturnAddrDelta,
+                           true, false);
   }
 
   if (hasFP(MF)) {
@@ -659,7 +670,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     int FrameIdx = MFI->CreateFixedObject(SlotSize,
                                           -(int)SlotSize +
                                           TFI.getOffsetOfLocalArea() +
-                                          TailCallReturnAddrDelta);
+                                          TailCallReturnAddrDelta,
+                                          true, false);
     assert(FrameIdx == MFI->getObjectIndexBegin() &&
            "Slot for EBP register must be last in order to be found!");
     FrameIdx = 0;
@@ -1271,7 +1283,7 @@ unsigned X86RegisterInfo::getRARegister() const {
                  : X86::EIP;    // Should have dwarf #8.
 }
 
-unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return hasFP(MF) ? FramePtr : StackPtr;
 }
 
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index f635707..f281a3c 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -153,7 +153,7 @@ public:
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
   int getFrameIndexOffset(MachineFunction &MF, int FI) const;
   void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 9525f04..b901c14 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -18,8 +18,10 @@
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Host.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 #if defined(_MSC_VER)
@@ -257,118 +259,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
   }
 }
 
-static const char *GetCurrentX86CPU() {
-  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
-  if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
-    return "generic";
-  unsigned Family = 0;
-  unsigned Model  = 0;
-  DetectFamilyModel(EAX, Family, Model);
-
-  GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-  bool Em64T = (EDX >> 29) & 0x1;
-  bool HasSSE3 = (ECX & 0x1);
-
-  union {
-    unsigned u[3];
-    char     c[12];
-  } text;
-
-  GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
-  if (memcmp(text.c, "GenuineIntel", 12) == 0) {
-    switch (Family) {
-      case 3:
-        return "i386";
-      case 4:
-        return "i486";
-      case 5:
-        switch (Model) {
-        case 4:  return "pentium-mmx";
-        default: return "pentium";
-        }
-      case 6:
-        switch (Model) {
-        case 1:  return "pentiumpro";
-        case 3:
-        case 5:
-        case 6:  return "pentium2";
-        case 7:
-        case 8:
-        case 10:
-        case 11: return "pentium3";
-        case 9:
-        case 13: return "pentium-m";
-        case 14: return "yonah";
-        case 15:
-        case 22: // Celeron M 540
-          return "core2";
-        case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE)
-          return "penryn";
-        default: return "i686";
-        }
-      case 15: {
-        switch (Model) {
-        case 3:  
-        case 4:
-        case 6: // same as 4, but 65nm
-          return (Em64T) ? "nocona" : "prescott";
-        case 26:
-          return "corei7";
-        case 28:
-          return "atom";
-        default:
-          return (Em64T) ? "x86-64" : "pentium4";
-        }
-      }
-        
-    default:
-      return "generic";
-    }
-  } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
-    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
-    // appears to be no way to generate the wide variety of AMD-specific targets
-    // from the information returned from CPUID.
-    switch (Family) {
-      case 4:
-        return "i486";
-      case 5:
-        switch (Model) {
-        case 6:
-        case 7:  return "k6";
-        case 8:  return "k6-2";
-        case 9:
-        case 13: return "k6-3";
-        default: return "pentium";
-        }
-      case 6:
-        switch (Model) {
-        case 4:  return "athlon-tbird";
-        case 6:
-        case 7:
-        case 8:  return "athlon-mp";
-        case 10: return "athlon-xp";
-        default: return "athlon";
-        }
-      case 15:
-        if (HasSSE3) {
-          return "k8-sse3";
-        } else {
-          switch (Model) {
-          case 1:  return "opteron";
-          case 5:  return "athlon-fx"; // also opteron
-          default: return "athlon64";
-          }
-        }
-      case 16:
-        return "amdfam10";
-    default:
-      return "generic";
-    }
-  } else {
-    return "generic";
-  }
-}
-
 X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, 
                            bool is64Bit)
   : PICStyle(PICStyles::None)
@@ -395,7 +285,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   // Determine default and user specified characteristics
   if (!FS.empty()) {
     // If feature string is not empty, parse features string.
-    std::string CPU = GetCurrentX86CPU();
+    std::string CPU = sys::getHostCPUName();
     ParseSubtargetFeatures(FS, CPU);
     // All X86-64 CPUs also have SSE2, however user might request no SSE via 
     // -mattr, so don't force SSELevel here.
@@ -455,3 +345,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   if (StackAlignment)
     stackAlignment = StackAlignment;
 }
+
+bool X86Subtarget::enablePostRAScheduler(
+            CodeGenOpt::Level OptLevel,
+            TargetSubtarget::AntiDepBreakMode& Mode,
+            RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e64b854..23f2841 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -166,11 +166,11 @@ public:
   std::string getDataLayout() const {
     const char *p;
     if (is64Bit())
-      p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
+      p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
     else if (isTargetDarwin())
-      p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+      p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
     else
-      p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
+      p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
     return std::string(p);
   }
 
@@ -219,10 +219,8 @@ public:
   /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling
   /// at 'More' optimization level.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& mode) const {
-    mode = TargetSubtarget::ANTIDEP_CRITICAL;
-    return OptLevel >= CodeGenOpt::Default;
-  }
+                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index a61de1c..0cda8bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -22,8 +22,7 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
-                                                const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
@@ -186,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   }
   
   // 64-bit JIT places everything in the same buffer except external functions.
-  // On Darwin, use small code model but hack the call instruction for 
-  // externals.  Elsewhere, do not assume globals are in the lower 4G.
-  if (Subtarget.is64Bit()) {
-    if (Subtarget.isTargetDarwin())
-      setCodeModel(CodeModel::Small);
-    else
+  if (Subtarget.is64Bit())
       setCodeModel(CodeModel::Large);
-  }
 
   PM.add(createX86CodeEmitterPass(*this, MCE));
 
@@ -212,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   }
   
   // 64-bit JIT places everything in the same buffer except external functions.
-  // On Darwin, use small code model but hack the call instruction for 
-  // externals.  Elsewhere, do not assume globals are in the lower 4G.
-  if (Subtarget.is64Bit()) {
-    if (Subtarget.isTargetDarwin())
-      setCodeModel(CodeModel::Small);
-    else
+  if (Subtarget.is64Bit())
       setCodeModel(CodeModel::Large);
-  }
 
   PM.add(createX86JITCodeEmitterPass(*this, JCE));
 
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index bc1bbc3..d7106a0 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -361,7 +361,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
     return;
   }
   printInstruction(MI);
-  if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+  if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
 
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 860b72f..da2fb04 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -149,10 +149,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr,
 
 /// InstructionSelect - This callback is invoked by
 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void XCoreDAGToDAGISel::
-InstructionSelect() {
-  DEBUG(BB->dump());
-
+void XCoreDAGToDAGISel::InstructionSelect() {
   // Select target instructions for the DAG.
   SelectRoot(*CurDAG);
   
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 5ef56c9..16e68fe 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -860,7 +860,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       }
       // Create the frame index object for this incoming parameter...
       int FI = MFI->CreateFixedObject(ObjSize,
-                                      LRSaveSize + VA.getLocMemOffset());
+                                      LRSaveSize + VA.getLocMemOffset(),
+                                      true, false);
 
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
@@ -884,7 +885,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       // address
       for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
         // Create a stack slot
-        int FI = MFI->CreateFixedObject(4, offset);
+        int FI = MFI->CreateFixedObject(4, offset, true, false);
         if (i == FirstVAReg) {
           XFI->setVarArgsFrameIndex(FI);
         }
@@ -905,7 +906,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
     } else {
       // This will point to the next argument passed via stack.
       XFI->setVarArgsFrameIndex(
-          MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset()));
+        MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+                               true, false));
     }
   }
   
@@ -916,6 +918,17 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+               const SmallVectorImpl<EVT> &OutTys,
+               const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+               SelectionDAG &DAG) {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore);
+}
+
 SDValue
 XCoreTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index ef8555e..10631af 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -159,6 +159,12 @@ namespace llvm {
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   DebugLoc dl, SelectionDAG &DAG);
+
+    virtual bool
+      CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<EVT> &OutTys,
+                     const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+                     SelectionDAG &DAG);
   };
 }
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 68e69a2..4ed4ed4 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -617,7 +617,7 @@ defm EXTSP : FU6_LU6_np<"extsp">;
 let mayStore = 1 in
 defm ENTSP : FU6_LU6_np<"entsp">;
 
-let isReturn = 1, isTerminator = 1, mayLoad = 1 in {
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
 defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
 }
 }
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 136a035..c7c8c7b 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -330,9 +330,10 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     int FrameIdx;
     if (! isVarArg) {
       // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
-      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0);
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false);
     } else {
-      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+                                        false);
     }
     XFI->setUsesLR(FrameIdx);
     XFI->setLRSpillSlot(FrameIdx);
@@ -340,13 +341,15 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (requiresRegisterScavenging(MF)) {
     // Reserve a slot close to SP or frame pointer.
     RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                RC->getAlignment()));
+                                                       RC->getAlignment(),
+                                                       false));
   }
   if (hasFP(MF)) {
     // A callee save register is used to hold the FP.
     // This needs saving / restoring in the epilogue / prologue.
     XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
-                        RC->getAlignment()));
+                                               RC->getAlignment(),
+                                               false));
   }
 }
 
@@ -593,7 +596,7 @@ int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
-unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   bool FP = hasFP(MF);
   
   return FP ? XCore::R10 : XCore::SP;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index a7df510..8ab1750 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -60,7 +60,7 @@ public:
   unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
                                int SPAdj, int *Value = NULL,
                                RegScavenger *RS = NULL) const;
-                           
+
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                 RegScavenger *RS = NULL) const;
 
@@ -71,7 +71,7 @@ public:
   
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
   void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
   //! Return the array of argument passing registers
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 75f2055..267f46a 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -25,7 +25,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS),
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
-               "i16:16:32-i32:32:32-i64:32:32"),
+               "i16:16:32-i32:32:32-i64:32:32-n32"),
     InstrInfo(),
     FrameInfo(*this),
     TLInfo(*this) {
diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt
index b80d15b..917b745 100644
--- a/lib/Transforms/Hello/CMakeLists.txt
+++ b/lib/Transforms/Hello/CMakeLists.txt
@@ -1,3 +1,3 @@
-add_llvm_library( LLVMHello
+add_llvm_loadable_module( LLVMHello
   Hello.cpp
   )
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 234d0ec..442f2fb 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -20,7 +20,6 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -245,8 +244,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
   return false;
 }
 
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
-                                             LLVMContext &Context) {
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
   ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
   if (!CI) return 0;
   unsigned IdxV = CI->getZExtValue();
@@ -282,8 +280,7 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
 /// users of the global, cleaning up the obvious ones.  This is largely just a
 /// quick scan over the use list to clean up the easy and obvious cruft.  This
 /// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
-                                       LLVMContext &Context) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
     User *U = *UI++;
@@ -304,11 +301,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
         Constant *SubInit = 0;
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
-        Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit);
       } else if (CE->getOpcode() == Instruction::BitCast && 
                  isa<PointerType>(CE->getType())) {
         // Pointer cast, delete any stores and memsets to the global.
-        Changed |= CleanupConstantGlobalUsers(CE, 0, Context);
+        Changed |= CleanupConstantGlobalUsers(CE, 0);
       }
 
       if (CE->use_empty()) {
@@ -322,11 +319,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
         ConstantExpr *CE = 
-          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context));
+          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
       }
-      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context);
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
 
       if (GEP->use_empty()) {
         GEP->eraseFromParent();
@@ -344,7 +341,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
       if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
-        CleanupConstantGlobalUsers(V, Init, Context);
+        CleanupConstantGlobalUsers(V, Init);
         return true;
       }
     }
@@ -469,8 +466,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
 /// behavior of the program in a more fine-grained way.  We have determined that
 /// this transformation is safe already.  We return the first global variable we
 /// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
-                                 LLVMContext &Context) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
   // Make sure this global only has simple uses that we can SRA.
   if (!GlobalUsersSafeToSRA(GV))
     return 0;
@@ -492,11 +488,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
     const StructLayout &Layout = *TD.getStructLayout(STy);
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       Constant *In = getAggregateConstantElement(Init,
-                                ConstantInt::get(Type::getInt32Ty(Context), i),
-                                    Context);
+                    ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
       assert(In && "Couldn't get element of initializer?");
-      GlobalVariable *NGV = new GlobalVariable(Context,
-                                               STy->getElementType(i), false,
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
                                                GlobalVariable::InternalLinkage,
                                                In, GV->getName()+"."+Twine(i),
                                                GV->isThreadLocal(),
@@ -527,12 +521,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
     for (unsigned i = 0, e = NumElements; i != e; ++i) {
       Constant *In = getAggregateConstantElement(Init,
-                                ConstantInt::get(Type::getInt32Ty(Context), i),
-                                    Context);
+                    ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
       assert(In && "Couldn't get element of initializer?");
 
-      GlobalVariable *NGV = new GlobalVariable(Context,
-                                               STy->getElementType(), false,
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
                                                GlobalVariable::InternalLinkage,
                                                In, GV->getName()+"."+Twine(i),
                                                GV->isThreadLocal(),
@@ -554,7 +546,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
 
   DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
-  Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context));
+  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
 
   // Loop over all of the uses of the global, replacing the constantexpr geps,
   // with smaller constantexpr geps or direct references.
@@ -678,8 +670,7 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
   return true;
 }
 
-static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
-                                           LLVMContext &Context) {
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
     Instruction *I = cast<Instruction>(*UI++);
@@ -712,7 +703,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
                                 ConstantExpr::getCast(CI->getOpcode(),
-                                                NewV, CI->getType()), Context);
+                                                      NewV, CI->getType()));
       if (CI->use_empty()) {
         Changed = true;
         CI->eraseFromParent();
@@ -730,7 +721,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
       if (Idxs.size() == GEPI->getNumOperands()-1)
         Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
                           ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
-                                                        Idxs.size()), Context);
+                                                        Idxs.size()));
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -746,8 +737,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
 /// value stored into it.  If there are uses of the loaded value that would trap
 /// if the loaded value is dynamically null, then we know that they cannot be
 /// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
-                                            LLVMContext &Context) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   bool Changed = false;
 
   // Keep track of whether we are able to remove all the uses of the global
@@ -758,7 +748,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
     User *GlobalUser = *GUI++;
     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
-      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context);
+      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
       // If we were able to delete all uses of the loads
       if (LI->use_empty()) {
         LI->eraseFromParent();
@@ -789,7 +779,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
   // nor is the global.
   if (AllNonStoreUsesGone) {
     DEBUG(errs() << "  *** GLOBAL NOW DEAD!\n");
-    CleanupConstantGlobalUsers(GV, 0, Context);
+    CleanupConstantGlobalUsers(GV, 0);
     if (GV->use_empty()) {
       GV->eraseFromParent();
       ++NumDeleted;
@@ -801,10 +791,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
 
 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
 /// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
+static void ConstantPropUsersOf(Value *V) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
     if (Instruction *I = dyn_cast<Instruction>(*UI++))
-      if (Constant *NewC = ConstantFoldInstruction(I, Context)) {
+      if (Constant *NewC = ConstantFoldInstruction(I)) {
         I->replaceAllUsesWith(NewC);
 
         // Advance UI to the next non-I use to avoid invalidating it!
@@ -824,11 +814,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      CallInst *CI,
                                                      const Type *AllocTy,
                                                      Value* NElems,
-                                                     LLVMContext &Context,
                                                      TargetData* TD) {
   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
 
-  const Type *IntPtrTy = TD->getIntPtrType(Context);
+  const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
   
   // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
   // returned NULL and we would not be here).
@@ -883,10 +872,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // If there is a comparison against null, we will insert a global bool to
   // keep track of whether the global was initialized yet or not.
   GlobalVariable *InitBool =
-    new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
                        GlobalValue::InternalLinkage,
-                       ConstantInt::getFalse(Context), GV->getName()+".init",
-                       GV->isThreadLocal());
+                       ConstantInt::getFalse(GV->getContext()),
+                       GV->getName()+".init", GV->isThreadLocal());
   bool InitBoolUsed = false;
 
   // Loop over all uses of GV, processing them in turn.
@@ -905,8 +894,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
           switch (ICI->getPredicate()) {
           default: llvm_unreachable("Unknown ICmp Predicate!");
           case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_SLT:
-            LV = ConstantInt::getFalse(Context);   // X < null -> always false
+          case ICmpInst::ICMP_SLT:   // X < null -> always false
+            LV = ConstantInt::getFalse(GV->getContext());
             break;
           case ICmpInst::ICMP_ULE:
           case ICmpInst::ICMP_SLE:
@@ -928,7 +917,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
     } else {
       StoreInst *SI = cast<StoreInst>(GV->use_back());
       // The global is initialized when the store to it occurs.
-      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
+      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
       SI->eraseFromParent();
     }
 
@@ -949,9 +938,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // To further other optimizations, loop over all users of NewGV and try to
   // constant prop them.  This will promote GEP instructions with constant
   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
-  ConstantPropUsersOf(NewGV, Context);
+  ConstantPropUsersOf(NewGV);
   if (RepValue != NewGV)
-    ConstantPropUsersOf(RepValue, Context);
+    ConstantPropUsersOf(RepValue);
 
   return NewGV;
 }
@@ -1153,8 +1142,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
 
 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
-                   LLVMContext &Context) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
   
   if (FieldNo >= FieldVals.size())
@@ -1172,7 +1160,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
     // a new Load of the scalarized global.
     Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
                                            InsertedScalarizedValues,
-                                           PHIsToRewrite, Context),
+                                           PHIsToRewrite),
                           LI->getName()+".f"+Twine(FieldNo), LI);
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
@@ -1196,16 +1184,14 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
 /// the load, rewrite the derived value to use the HeapSRoA'd load.
 static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
-                   LLVMContext &Context) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   // If this is a comparison against null, handle it.
   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
     // If we have a setcc of the loaded pointer, we can use a setcc of any
     // field.
     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
-                                   InsertedScalarizedValues, PHIsToRewrite,
-                                   Context);
+                                   InsertedScalarizedValues, PHIsToRewrite);
     
     Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
                               Constant::getNullValue(NPtr->getType()), 
@@ -1223,8 +1209,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
     // Load the pointer for this field.
     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
-                                     InsertedScalarizedValues, PHIsToRewrite,
-                                     Context);
+                                     InsertedScalarizedValues, PHIsToRewrite);
     
     // Create the new GEP idx vector.
     SmallVector<Value*, 8> GEPIdx;
@@ -1256,8 +1241,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   // users.
   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
     Instruction *User = cast<Instruction>(*UI++);
-    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
-                            Context);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   }
 }
 
@@ -1267,13 +1251,11 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
-                   LLVMContext &Context) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
        UI != E; ) {
     Instruction *User = cast<Instruction>(*UI++);
-    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
-                            Context);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   }
   
   if (Load->use_empty()) {
@@ -1285,8 +1267,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
-                                            Value* NElems, LLVMContext &Context,
-                                            TargetData *TD) {
+                                            Value* NElems, TargetData *TD) {
   DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
   const Type* MAT = getMallocAllocatedType(CI);
   const StructType *STy = cast<StructType>(MAT);
@@ -1315,14 +1296,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     FieldGlobals.push_back(NGV);
     
     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
-    if (const StructType* ST = dyn_cast<StructType>(FieldTy))
+    if (const StructType *ST = dyn_cast<StructType>(FieldTy))
       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
-    const Type* IntPtrTy = TD->getIntPtrType(Context);
+    const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
                                         ConstantInt::get(IntPtrTy, TypeSize),
                                         NElems,
                                         CI->getName() + ".f" + Twine(FieldNo));
-    FieldMallocs.push_back(NMI);
+    CallInst *NCI = dyn_cast<BitCastInst>(NMI) ?
+                    extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI);
+    FieldMallocs.push_back(NCI);
     new StoreInst(NMI, NGV, CI);
   }
   
@@ -1338,15 +1321,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   //      if (F1) { free(F1); F1 = 0; }
   //      if (F2) { free(F2); F2 = 0; }
   //    }
-  Value *RunningOr = 0;
+  // The malloc can also fail if its argument is too large.
+  Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0);
+  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1),
+                                  ConstantZero, "isneg");
   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
     Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
                              Constant::getNullValue(FieldMallocs[i]->getType()),
                                "isnull");
-    if (!RunningOr)
-      RunningOr = Cond;   // First seteq
-    else
-      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
   }
 
   // Split the basic block at the old malloc.
@@ -1355,7 +1338,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
-  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
+  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+                                                "malloc_ret_null",
                                                 OrigBB->getParent());
   
   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
@@ -1370,9 +1354,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
                               Constant::getNullValue(GVVal->getType()),
                               "tmp");
-    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",
+    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
                                                OrigBB->getParent());
-    BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
+    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
                                                OrigBB->getParent());
     Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
                                          Cmp, NullPtrBlock);
@@ -1406,8 +1390,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     Instruction *User = cast<Instruction>(*UI++);
     
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
-                                   Context);
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
       continue;
     }
     
@@ -1438,7 +1421,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       Value *InVal = PN->getIncomingValue(i);
       InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
-                               PHIsToRewrite, Context);
+                               PHIsToRewrite);
       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
     }
   }
@@ -1477,8 +1460,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                CallInst *CI,
                                                const Type *AllocTy,
                                                Module::global_iterator &GVI,
-                                               TargetData *TD,
-                                               LLVMContext &Context) {
+                                               TargetData *TD) {
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1508,15 +1490,14 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   // This eliminates dynamic allocation, avoids an indirection accessing the
   // data, and exposes the resultant global to further GlobalOpt.
   // We cannot optimize the malloc if we cannot determine malloc array size.
-  if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
+  if (Value *NElems = getMallocArraySize(CI, TD, true)) {
     if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
       // Restrict this transformation to only working on small allocations
       // (2048 bytes currently), as we don't want to introduce a 16M global or
       // something.
       if (TD && 
           NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
-                                            Context, TD);
+        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
         return true;
       }
   
@@ -1540,7 +1521,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
         // structs.  malloc [100 x struct],1 -> malloc struct, 100
         if (const ArrayType *AT =
                               dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
-          const Type *IntPtrTy = TD->getIntPtrType(Context);
+          const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
           unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
           Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
           Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1551,12 +1532,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
           CI->replaceAllUsesWith(Cast);
           CI->eraseFromParent();
           CI = dyn_cast<BitCastInst>(Malloc) ?
-               extractMallocCallFromBitCast(Malloc):
-               cast<CallInst>(Malloc);
+               extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
         }
       
-        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD), 
-                                   Context, TD);
+        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
         return true;
       }
     }
@@ -1569,7 +1548,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 // that only one value (besides its initializer) is ever stored to the global.
 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                      Module::global_iterator &GVI,
-                                     TargetData *TD, LLVMContext &Context) {
+                                     TargetData *TD) {
   // Ignore no-op GEPs and bitcasts.
   StoredOnceVal = StoredOnceVal->stripPointerCasts();
 
@@ -1585,12 +1564,12 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
          ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
 
       // Optimize away any trapping uses of the loaded value.
-      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
         return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
       const Type* MallocType = getMallocAllocatedType(CI);
       if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 
-                                                           GVI, TD, Context))
+                                                           GVI, TD))
         return true;
     }
   }
@@ -1602,8 +1581,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
 /// two values ever stored into GV are its initializer and OtherVal.  See if we
 /// can shrink the global into a boolean and select between the two values
 /// whenever it is used.  This exposes the values to other scalar optimizations.
-static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
-                                       LLVMContext &Context) {
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   const Type *GVElType = GV->getType()->getElementType();
   
   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
@@ -1611,7 +1589,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
   // between them is very expensive and unlikely to lead to later
   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   // where v1 and v2 both require constant pool loads, a big loss.
-  if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() ||
+  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+      GVElType->isFloatingPoint() ||
       isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
     return false;
   
@@ -1624,15 +1603,16 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
   DEBUG(errs() << "   *** SHRINKING TO BOOL: " << *GV);
   
   // Create the new global, initializing it to false.
-  GlobalVariable *NewGV = new GlobalVariable(Context,
-                                             Type::getInt1Ty(Context), false,
-         GlobalValue::InternalLinkage, ConstantInt::getFalse(Context),
+  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+                                             false,
+                                             GlobalValue::InternalLinkage, 
+                                        ConstantInt::getFalse(GV->getContext()),
                                              GV->getName()+".b",
                                              GV->isThreadLocal());
   GV->getParent()->getGlobalList().insert(GV, NewGV);
 
   Constant *InitVal = GV->getInitializer();
-  assert(InitVal->getType() != Type::getInt1Ty(Context) &&
+  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
          "No reason to shrink to bool!");
 
   // If initialized to zero and storing one into the global, we can use a cast
@@ -1649,7 +1629,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
       // Only do this if we weren't storing a loaded value.
       Value *StoreVal;
       if (StoringOther || SI->getOperand(0) == InitVal)
-        StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther);
+        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+                                    StoringOther);
       else {
         // Otherwise, we are storing a previously loaded copy.  To do this,
         // change the copy from copying the original value to just copying the
@@ -1708,24 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
   if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
 #if 0
-    cerr << "Global: " << *GV;
-    cerr << "  isLoaded = " << GS.isLoaded << "\n";
-    cerr << "  StoredType = ";
+    DEBUG(errs() << "Global: " << *GV);
+    DEBUG(errs() << "  isLoaded = " << GS.isLoaded << "\n");
+    DEBUG(errs() << "  StoredType = ");
     switch (GS.StoredType) {
-    case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break;
-    case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break;
-    case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break;
-    case GlobalStatus::isStored: cerr << "stored\n"; break;
+    case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break;
+    case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n");
+                                            break;
+    case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break;
+    case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break;
     }
     if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
-      cerr << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n";
+      DEBUG(errs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
     if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
-      cerr << "  AccessingFunction = " << GS.AccessingFunction->getName()
-                << "\n";
-    cerr << "  HasMultipleAccessingFunctions =  "
-              << GS.HasMultipleAccessingFunctions << "\n";
-    cerr << "  HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n";
-    cerr << "\n";
+      DEBUG(errs() << "  AccessingFunction = " << GS.AccessingFunction->getName()
+                  << "\n");
+    DEBUG(errs() << "  HasMultipleAccessingFunctions =  "
+                 << GS.HasMultipleAccessingFunctions << "\n");
+    DEBUG(errs() << "  HasNonInstructionUser = " 
+                 << GS.HasNonInstructionUser<<"\n");
+    DEBUG(errs() << "\n");
 #endif
     
     // If this is a first class global and has only one accessing function
@@ -1764,8 +1747,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
       // Delete any stores we can find to the global.  We may not be able to
       // make it completely dead though.
-      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), 
-                                                GV->getContext());
+      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
 
       // If the global is dead now, delete it.
       if (GV->use_empty()) {
@@ -1780,7 +1762,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       GV->setConstant(true);
 
       // Clean up any obviously simplifiable users now.
-      CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext());
+      CleanupConstantGlobalUsers(GV, GV->getInitializer());
 
       // If the global is dead now, just nuke it.
       if (GV->use_empty()) {
@@ -1794,8 +1776,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       return true;
     } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
       if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
-        if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD,
-                                                   GV->getContext())) {
+        if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
           GVI = FirstNewGV;  // Don't skip the newly produced globals!
           return true;
         }
@@ -1810,8 +1791,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
           GV->setInitializer(SOVConstant);
 
           // Clean up any obviously simplifiable users now.
-          CleanupConstantGlobalUsers(GV, GV->getInitializer(), 
-                                     GV->getContext());
+          CleanupConstantGlobalUsers(GV, GV->getInitializer());
 
           if (GV->use_empty()) {
             DEBUG(errs() << "   *** Substituting initializer allowed us to "
@@ -1828,14 +1808,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       // Try to optimize globals based on the knowledge that only one value
       // (besides its initializer) is ever stored to the global.
       if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
-                                   getAnalysisIfAvailable<TargetData>(),
-                                   GV->getContext()))
+                                   getAnalysisIfAvailable<TargetData>()))
         return true;
 
       // Otherwise, if the global was not a boolean, we can shrink it to be a
       // boolean.
       if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
-        if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) {
+        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
           ++NumShrunkToBool;
           return true;
         }
@@ -1987,11 +1966,10 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
 /// specified array, returning the new global to use.
 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 
-                                          const std::vector<Function*> &Ctors,
-                                          LLVMContext &Context) {
+                                          const std::vector<Function*> &Ctors) {
   // If we made a change, reassemble the initializer list.
   std::vector<Constant*> CSVals;
-  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535));
+  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
   CSVals.push_back(0);
   
   // Create the new init list.
@@ -2000,12 +1978,14 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
     if (Ctors[i]) {
       CSVals[1] = Ctors[i];
     } else {
-      const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false);
+      const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+                                          false);
       const PointerType *PFTy = PointerType::getUnqual(FTy);
       CSVals[1] = Constant::getNullValue(PFTy);
-      CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647);
+      CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+                                   2147483647);
     }
-    CAList.push_back(ConstantStruct::get(Context, CSVals, false));
+    CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
   }
   
   // Create the array initializer.
@@ -2021,8 +2001,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   }
   
   // Create the new global and insert it next to the existing list.
-  GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), 
-                                           GCL->isConstant(),
+  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
                                            GCL->getLinkage(), CA, "",
                                            GCL->isThreadLocal());
   GCL->getParent()->getGlobalList().insert(GCL, NGV);
@@ -2056,7 +2035,7 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
 /// enough for us to understand.  In particular, if it is a cast of something,
 /// we punt.  We basically just support direct accesses to globals and GEP's of
 /// globals.  This should be kept up to date with CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
   // Conservatively, avoid aggregate types. This is because we don't
   // want to worry about them partially overlapping other stores.
   if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
@@ -2096,8 +2075,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
 /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
-                                   ConstantExpr *Addr, unsigned OpNo,
-                                   LLVMContext &Context) {
+                                   ConstantExpr *Addr, unsigned OpNo) {
   // Base case of the recursion.
   if (OpNo == Addr->getNumOperands()) {
     assert(Val->getType() == Init->getType() && "Type mismatch!");
@@ -2126,10 +2104,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
     unsigned Idx = CU->getZExtValue();
     assert(Idx < STy->getNumElements() && "Struct index out of range!");
-    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context);
+    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
     
     // Return the modified struct.
-    return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked());
+    return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
+                               STy->isPacked());
   } else {
     ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
     const ArrayType *ATy = cast<ArrayType>(Init->getType());
@@ -2152,15 +2131,14 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     
     assert(CI->getZExtValue() < ATy->getNumElements());
     Elts[CI->getZExtValue()] =
-      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context);
+      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
     return ConstantArray::get(ATy, Elts);
   }    
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
-static void CommitValueTo(Constant *Val, Constant *Addr,
-                          LLVMContext &Context) {
+static void CommitValueTo(Constant *Val, Constant *Addr) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
     assert(GV->hasInitializer());
     GV->setInitializer(Val);
@@ -2171,7 +2149,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr,
   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   
   Constant *Init = GV->getInitializer();
-  Init = EvaluateStoreInto(Init, Val, CE, 2, Context);
+  Init = EvaluateStoreInto(Init, Val, CE, 2);
   GV->setInitializer(Init);
 }
 
@@ -2179,8 +2157,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr,
 /// P after the stores reflected by 'memory' have been performed.  If we can't
 /// decide, return null.
 static Constant *ComputeLoadResult(Constant *P,
-                                const DenseMap<Constant*, Constant*> &Memory,
-                                LLVMContext &Context) {
+                                const DenseMap<Constant*, Constant*> &Memory) {
   // If this memory location has been recently stored, use the stored value: it
   // is the most up-to-date.
   DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
@@ -2218,8 +2195,6 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
     return false;
   
-  LLVMContext &Context = F->getContext();
-  
   CallStack.push_back(F);
   
   /// Values - As we compute SSA register values, we store their contents here.
@@ -2246,7 +2221,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
       if (SI->isVolatile()) return false;  // no volatile accesses.
       Constant *Ptr = getVal(Values, SI->getOperand(1));
-      if (!isSimpleEnoughPointerToCommit(Ptr, Context))
+      if (!isSimpleEnoughPointerToCommit(Ptr))
         // If this is too complex for us to commit, reject it.
         return false;
       Constant *Val = getVal(Values, SI->getOperand(0));
@@ -2280,12 +2255,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
       if (LI->isVolatile()) return false;  // no volatile accesses.
       InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
-                                     MutatedMemory, Context);
+                                     MutatedMemory);
       if (InstResult == 0) return false; // Could not evaluate load.
     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
       if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
       const Type *Ty = AI->getType()->getElementType();
-      AllocaTmps.push_back(new GlobalVariable(Context, Ty, false,
+      AllocaTmps.push_back(new GlobalVariable(Ty, false,
                                               GlobalValue::InternalLinkage,
                                               UndefValue::get(Ty),
                                               AI->getName()));
@@ -2423,7 +2398,7 @@ static bool EvaluateStaticConstructor(Function *F) {
           << " stores.\n");
     for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
          E = MutatedMemory.end(); I != E; ++I)
-      CommitValueTo(I->second, I->first, F->getContext());
+      CommitValueTo(I->second, I->first);
   }
   
   // At this point, we are done interpreting.  If we created any 'alloca'
@@ -2480,7 +2455,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   
   if (!MadeChange) return false;
   
-  GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext());
+  GCL = InstallGlobalCtors(GCL, Ctors);
   return true;
 }
 
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index ea47366..6918fe8 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,10 +19,11 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -32,6 +33,7 @@
 using namespace llvm;
 
 STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 
@@ -336,23 +338,38 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
       CallSite CS = CallSites[CSi];
       
+      Function *Caller = CS.getCaller();
       Function *Callee = CS.getCalledFunction();
-      // We can only inline direct calls to non-declarations.
-      if (Callee == 0 || Callee->isDeclaration()) continue;
+
+      // If this call site is dead and it is to a readonly function, we should
+      // just delete the call instead of trying to inline it, regardless of
+      // size.  This happens because IPSCCP propagates the result out of the
+      // call and then we're left with the dead call.
+      if (isInstructionTriviallyDead(CS.getInstruction())) {
+        DEBUG(errs() << "    -> Deleting dead call: "
+                     << *CS.getInstruction() << "\n");
+        // Update the call graph by deleting the edge from Callee to Caller.
+        CG[Caller]->removeCallEdgeFor(CS);
+        CS.getInstruction()->eraseFromParent();
+        ++NumCallsDeleted;
+      } else {
+        // We can only inline direct calls to non-declarations.
+        if (Callee == 0 || Callee->isDeclaration()) continue;
       
-      // If the policy determines that we should inline this function,
-      // try to do so.
-      if (!shouldInline(CS))
-        continue;
+        // If the policy determines that we should inline this function,
+        // try to do so.
+        if (!shouldInline(CS))
+          continue;
 
-      Function *Caller = CS.getCaller();
-      // Attempt to inline the function...
-      if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
-        continue;
+        // Attempt to inline the function...
+        if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+          continue;
+        ++NumInlined;
+      }
       
-      // If we inlined the last possible call site to the function, delete the
-      // function body now.
-      if (Callee->use_empty() && Callee->hasLocalLinkage() &&
+      // If we inlined or deleted the last possible call site to the function,
+      // delete the function body now.
+      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
           // TODO: Can remove if in SCC now.
           !SCCFunctions.count(Callee) &&
           
@@ -391,7 +408,6 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
       }
       --CSi;
 
-      ++NumInlined;
       Changed = true;
       LocalChange = true;
     }
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index fd69aeb..cb81330 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -75,6 +75,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (L->getParentLoop())
     return false;
 
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
   DominatorTree &DT = getAnalysis<DominatorTree>();
   bool Changed = false;
 
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 4f6369e..0b5e007 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -202,53 +202,35 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
 // llvm.dbg.region.end calls, and any globals they point to if now dead.
 static bool StripDebugInfo(Module &M) {
 
+  bool Changed = false;
+
   // Remove all of the calls to the debugger intrinsics, and remove them from
   // the module.
-  Function *FuncStart = M.getFunction("llvm.dbg.func.start");
-  Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");
-  Function *RegionStart = M.getFunction("llvm.dbg.region.start");
-  Function *RegionEnd = M.getFunction("llvm.dbg.region.end");
-  Function *Declare = M.getFunction("llvm.dbg.declare");
-
-  if (FuncStart) {
-    while (!FuncStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      CI->eraseFromParent();
-    }
-    FuncStart->eraseFromParent();
-  }
-  if (StopPoint) {
-    while (!StopPoint->use_empty()) {
-      CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      CI->eraseFromParent();
-    }
-    StopPoint->eraseFromParent();
-  }
-  if (RegionStart) {
-    while (!RegionStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      CI->eraseFromParent();
-    }
-    RegionStart->eraseFromParent();
-  }
-  if (RegionEnd) {
-    while (!RegionEnd->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      CI->eraseFromParent();
-    }
-    RegionEnd->eraseFromParent();
-  }
-  if (Declare) {
+  if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
     while (!Declare->use_empty()) {
       CallInst *CI = cast<CallInst>(Declare->use_back());
       CI->eraseFromParent();
     }
     Declare->eraseFromParent();
+    Changed = true;
   }
 
   NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
-  if (NMD)
+  if (NMD) {
+    Changed = true;
     NMD->eraseFromParent();
+  }
+  MetadataContext &TheMetadata = M.getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+  if (!MDDbgKind)
+    return Changed;
+
+  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) 
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+         ++FI)
+      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+           ++BI) 
+        TheMetadata.removeMD(MDDbgKind, BI);
 
   return true;
 }
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index c8541d7..e58fa63 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -412,7 +412,9 @@ class ABCD : public FunctionPass {
   /// If PN_op1 and PN_o2 are different from NULL, create a constraint
   /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
   /// with the respective V_op#, if V_op# is a ConstantInt.
-  void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value);
+  void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, 
+                              ConstantInt *V_op1, ConstantInt *V_op2,
+                              APInt value);
 
   /// Returns the sigma representing the Instruction I in BasicBlock BB.
   /// Returns NULL in case there is no sigma for this Instruction in this
@@ -735,25 +737,27 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
   APInt Zero = APInt::getNullValue(width);
 
   CmpInst::Predicate Pred = ICI->getPredicate();
+  ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
+  ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
   switch (Pred) {
   case CmpInst::ICMP_SGT:  // signed greater than
-    createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne);
-    createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero);
+    createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
+    createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
     break;
 
   case CmpInst::ICMP_SGE:  // signed greater or equal
-    createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero);
-    createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne);
+    createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
+    createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
     break;
 
   case CmpInst::ICMP_SLT:  // signed less than
-    createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne);
-    createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero);
+    createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
+    createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
     break;
 
   case CmpInst::ICMP_SLE:  // signed less or equal
-    createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero);
-    createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne);
+    createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
+    createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
     break;
 
   default:
@@ -772,6 +776,10 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
 /// b->a and c->a with weight 0 in the lower bound graph, and the edges
 /// a->b and a->c with weight 0 in the upper bound graph.
 void ABCD::createConstraintPHINode(PHINode *PN) {
+  // FIXME: We really want to disallow sigma nodes, but I don't know the best
+  // way to detect the other than this.
+  if (PN->getNumOperands() == 2) return;
+  
   int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *V = PN->getIncomingValue(i);
@@ -796,13 +804,11 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
     int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
     inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
     inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
-    created.insert(*SIG_op_t);
   }
   if (*SIG_op_f) {
     int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
     inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
     inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
-    created.insert(*SIG_op_f);
   }
 }
 
@@ -810,10 +816,17 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
 /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
 /// with the respective V_op#, if V_op# is a ConstantInt.
 void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+                                  ConstantInt *V_op1, ConstantInt *V_op2,
                                   APInt value) {
   if (SIG_op1 && SIG_op2) {
     inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
     inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
+  } else if (SIG_op1 && V_op2) {
+    inequality_graph.addEdge(V_op2, SIG_op1, value, true);
+    inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
+  } else if (SIG_op2 && V_op1) {
+    inequality_graph.addEdge(SIG_op2, V_op1, value, true);
+    inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
   }
 }
 
@@ -1036,7 +1049,7 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
 
 /// Prints the body of the dot file
 void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
-  DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin =
+  DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin =
       graph.begin(), end = graph.end();
 
   for (; begin != end ; ++begin) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index e048518..5a92399 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMScalarOpts
   ADCE.cpp
   BasicBlockPlacement.cpp
   CodeGenPrepare.cpp
-  CondPropagate.cpp
   ConstantProp.cpp
   DCE.cpp
   DeadStoreElimination.cpp
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
deleted file mode 100644
index 8a6c556..0000000
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass propagates information about conditional expressions through the
-// program, allowing it to eliminate conditional branches in some cases.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "condprop"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
-STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
-
-namespace {
-  struct CondProp : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    CondProp() : FunctionPass(&ID) {}
-
-    virtual bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequiredID(BreakCriticalEdgesID);
-      //AU.addRequired<DominanceFrontier>();
-    }
-
-  private:
-    bool MadeChange;
-    SmallVector<BasicBlock *, 4> DeadBlocks;
-    void SimplifyBlock(BasicBlock *BB);
-    void SimplifyPredecessors(BranchInst *BI);
-    void SimplifyPredecessors(SwitchInst *SI);
-    void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB);
-    bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI);
-  };
-}
-  
-char CondProp::ID = 0;
-static RegisterPass<CondProp> X("condprop", "Conditional Propagation");
-
-FunctionPass *llvm::createCondPropagationPass() {
-  return new CondProp();
-}
-
-bool CondProp::runOnFunction(Function &F) {
-  bool EverMadeChange = false;
-  DeadBlocks.clear();
-
-  // While we are simplifying blocks, keep iterating.
-  do {
-    MadeChange = false;
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E;)
-      SimplifyBlock(BB++);
-    EverMadeChange = EverMadeChange || MadeChange;
-  } while (MadeChange);
-
-  if (EverMadeChange) {
-    while (!DeadBlocks.empty()) {
-      BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back();
-      DeleteDeadBlock(BB);
-    }
-  }
-  return EverMadeChange;
-}
-
-void CondProp::SimplifyBlock(BasicBlock *BB) {
-  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-    // If this is a conditional branch based on a phi node that is defined in
-    // this block, see if we can simplify predecessors of this block.
-    if (BI->isConditional() && isa<PHINode>(BI->getCondition()) &&
-        cast<PHINode>(BI->getCondition())->getParent() == BB)
-      SimplifyPredecessors(BI);
-
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
-    if (isa<PHINode>(SI->getCondition()) &&
-        cast<PHINode>(SI->getCondition())->getParent() == BB)
-      SimplifyPredecessors(SI);
-  }
-
-  // If possible, simplify the terminator of this block.
-  if (ConstantFoldTerminator(BB))
-    MadeChange = true;
-
-  // If this block ends with an unconditional branch and the only successor has
-  // only this block as a predecessor, merge the two blocks together.
-  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
-    if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() &&
-        BB != BI->getSuccessor(0)) {
-      BasicBlock *Succ = BI->getSuccessor(0);
-      
-      // If Succ has any PHI nodes, they are all single-entry PHI's.  Eliminate
-      // them.
-      FoldSingleEntryPHINodes(Succ);
-      
-      // Remove BI.
-      BI->eraseFromParent();
-
-      // Move over all of the instructions.
-      BB->getInstList().splice(BB->end(), Succ->getInstList());
-
-      // Any phi nodes that had entries for Succ now have entries from BB.
-      Succ->replaceAllUsesWith(BB);
-
-      // Succ is now dead, but we cannot delete it without potentially
-      // invalidating iterators elsewhere.  Just insert an unreachable
-      // instruction in it and delete this block later on.
-      new UnreachableInst(BB->getContext(), Succ);
-      DeadBlocks.push_back(Succ);
-      MadeChange = true;
-    }
-}
-
-// SimplifyPredecessors(branches) - We know that BI is a conditional branch
-// based on a PHI node defined in this block.  If the phi node contains constant
-// operands, then the blocks corresponding to those operands can be modified to
-// jump directly to the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(BranchInst *BI) {
-  // TODO: We currently only handle the most trival case, where the PHI node has
-  // one use (the branch), and is the only instruction besides the branch and dbg
-  // intrinsics in the block.
-  PHINode *PN = cast<PHINode>(BI->getCondition());
-
-  if (PN->getNumIncomingValues() == 1) {
-    // Eliminate single-entry PHI nodes.
-    FoldSingleEntryPHINodes(PN->getParent());
-    return;
-  }
-  
-  
-  if (!PN->hasOneUse()) return;
-
-  BasicBlock *BB = BI->getParent();
-  if (&*BB->begin() != PN)
-    return;
-  BasicBlock::iterator BBI = BB->begin();
-  BasicBlock::iterator BBE = BB->end();
-  while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
-  if (&*BBI != BI)
-    return;
-
-  // Ok, we have this really simple case, walk the PHI operands, looking for
-  // constants.  Walk from the end to remove operands from the end when
-  // possible, and to avoid invalidating "i".
-  for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) {
-    Value *InVal = PN->getIncomingValue(i-1);
-    if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI))
-      continue;
-
-    ++NumBrThread;
-
-    // If there were two predecessors before this simplification, or if the
-    // PHI node contained all the same value except for the one we just
-    // substituted, the PHI node may be deleted.  Don't iterate through it the
-    // last time.
-    if (BI->getCondition() != PN) return;
-  }
-}
-
-// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node
-// defined in this block.  If the phi node contains constant operands, then the
-// blocks corresponding to those operands can be modified to jump directly to
-// the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(SwitchInst *SI) {
-  // TODO: We currently only handle the most trival case, where the PHI node has
-  // one use (the branch), and is the only instruction besides the branch and 
-  // dbg intrinsics in the block.
-  PHINode *PN = cast<PHINode>(SI->getCondition());
-  if (!PN->hasOneUse()) return;
-
-  BasicBlock *BB = SI->getParent();
-  if (&*BB->begin() != PN)
-    return;
-  BasicBlock::iterator BBI = BB->begin();
-  BasicBlock::iterator BBE = BB->end();
-  while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
-  if (&*BBI != SI)
-    return;
-
-  // Ok, we have this really simple case, walk the PHI operands, looking for
-  // constants.  Walk from the end to remove operands from the end when
-  // possible, and to avoid invalidating "i".
-  for (unsigned i = PN->getNumIncomingValues(); i != 0; --i)
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) {
-      BasicBlock *PredBB = PN->getIncomingBlock(i-1);
-      if (isa<BranchInst>(PredBB->getTerminator())) {
-        // If we have a constant, forward the edge from its current to its
-        // ultimate destination.
-        unsigned DestCase = SI->findCaseValue(CI);
-        RevectorBlockTo(PredBB, SI->getSuccessor(DestCase));
-        ++NumSwThread;
-
-        // If there were two predecessors before this simplification, or if the
-        // PHI node contained all the same value except for the one we just
-        // substituted, the PHI node may be deleted.  Don't iterate through it the
-        // last time.
-        if (SI->getCondition() != PN) return;
-      }
-    }
-}
-
-
-// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to
-// the ToBB block, which is one of the successors of its current successor.
-void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) {
-  BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
-  assert(FromBr->isUnconditional() && "FromBB should end with uncond br!");
-
-  // Get the old block we are threading through.
-  BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
-  // OldSucc had multiple successors. If ToBB has multiple predecessors, then 
-  // the edge between them would be critical, which we already took care of.
-  // If ToBB has single operand PHI node then take care of it here.
-  FoldSingleEntryPHINodes(ToBB);
-
-  // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
-  OldSucc->removePredecessor(FromBB);
-
-  // Change FromBr to branch to the new destination.
-  FromBr->setSuccessor(0, ToBB);
-
-  MadeChange = true;
-}
-
-bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){
-  BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
-  if (!FromBr->isUnconditional())
-    return false;
-
-  // Get the old block we are threading through.
-  BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
-  // If the condition is a constant, simply revector the unconditional branch at
-  // the end of FromBB to one of the successors of its current successor.
-  if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) {
-    BasicBlock *ToBB = BI->getSuccessor(CB->isZero());
-
-    // OldSucc had multiple successors. If ToBB has multiple predecessors, then 
-    // the edge between them would be critical, which we already took care of.
-    // If ToBB has single operand PHI node then take care of it here.
-    FoldSingleEntryPHINodes(ToBB);
-
-    // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
-    OldSucc->removePredecessor(FromBB);
-
-    // Change FromBr to branch to the new destination.
-    FromBr->setSuccessor(0, ToBB);
-  } else {
-    BasicBlock *Succ0 = BI->getSuccessor(0);
-    // Do not perform transform if the new destination has PHI nodes. The
-    // transform will add new preds to the PHI's.
-    if (isa<PHINode>(Succ0->begin()))
-      return false;
-
-    BasicBlock *Succ1 = BI->getSuccessor(1);
-    if (isa<PHINode>(Succ1->begin()))
-      return false;
-
-    // Insert the new conditional branch.
-    BranchInst::Create(Succ0, Succ1, Cond, FromBr);
-
-    FoldSingleEntryPHINodes(Succ0);
-    FoldSingleEntryPHINodes(Succ1);
-
-    // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
-    OldSucc->removePredecessor(FromBB);
-
-    // Delete the old branch.
-    FromBr->eraseFromParent();
-  }
-
-  MadeChange = true;
-  return true;
-}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 4fee327..ea20813 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -66,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
     WorkList.erase(WorkList.begin());    // Get an element from the worklist...
 
     if (!I->use_empty())                 // Don't muck with dead instructions...
-      if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {
+      if (Constant *C = ConstantFoldInstruction(I)) {
         // Add all of the users of this instruction to the worklist, they might
         // be constant propagatable now...
         for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 90436f4..b0988b5 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -78,19 +78,96 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination");
 
 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
-/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the
-/// stored size of V2.  This returns false if we don't know.
+/// doesClobberMemory - Does this instruction clobber (write without reading)
+/// some memory?
+static bool doesClobberMemory(Instruction *I) {
+  if (isa<StoreInst>(I))
+    return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
+    case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+    }
+  }
+  return false;
+}
+
+/// isElidable - If the value of this instruction and the memory it writes to is
+/// unused, may we delete this instrtction?
+static bool isElidable(Instruction *I) {
+  assert(doesClobberMemory(I));
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    return II->getIntrinsicID() != Intrinsic::lifetime_end;
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return !SI->isVolatile();
+  return true;
+}
+
+/// getPointerOperand - Return the pointer that is being clobbered.
+static Value *getPointerOperand(Instruction *I) {
+  assert(doesClobberMemory(I));
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+    return MI->getOperand(1);
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+    default:
+      assert(false && "Unexpected intrinsic!");
+    case Intrinsic::init_trampoline:
+      return II->getOperand(1);
+    case Intrinsic::lifetime_end:
+      return II->getOperand(2);
+  }
+}
+
+/// getStoreSize - Return the length in bytes of the write by the clobbering
+/// instruction. If variable or unknown, returns -1.
+static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+  assert(doesClobberMemory(I));
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (!TD) return -1u;
+    return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+  }
+
+  Value *Len;
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+    Len = MI->getLength();
+  } else {
+    IntrinsicInst *II = cast<IntrinsicInst>(I);
+    switch (II->getIntrinsicID()) {
+      default:
+        assert(false && "Unexpected intrinsic!");
+      case Intrinsic::init_trampoline:
+        return -1u;
+      case Intrinsic::lifetime_end:
+        Len = II->getOperand(1);
+        break;
+    }
+  }
+  if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
+    if (!LenCI->isAllOnesValue())
+      return LenCI->getZExtValue();
+  return -1u;
+}
+
+/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
+/// greater than or equal to the store in I2.  This returns false if we don't
+/// know.
 ///
-static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) {
-  const Type *V1Ty = V1->getType(), *V2Ty = V2->getType();
+static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
+                                   const TargetData *TD) {
+  const Type *I1Ty = getPointerOperand(I1)->getType();
+  const Type *I2Ty = getPointerOperand(I2)->getType();
   
   // Exactly the same type, must have exactly the same size.
-  if (V1Ty == V2Ty) return true;
+  if (I1Ty == I2Ty) return true;
   
-  // If we don't have target data, we don't know.
-  if (TD == 0) return false;
+  int I1Size = getStoreSize(I1, TD);
+  int I2Size = getStoreSize(I2, TD);
   
-  return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty);
+  return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
 }
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -104,14 +181,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     Instruction *Inst = BBI++;
     
     // If we find a store or a free, get its memory dependence.
-    if (!isa<StoreInst>(Inst) && !isFreeCall(Inst))
+    if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
       continue;
     
-    // Don't molest volatile stores or do queries that will return "clobber".
-    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
-      if (SI->isVolatile())
-        continue;
-
     MemDepResult InstDep = MD.getDependency(Inst);
     
     // Ignore non-local stores.
@@ -124,16 +196,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       continue;
     }
     
-    StoreInst *SI = cast<StoreInst>(Inst);
-    
     // If not a definite must-alias dependency, ignore it.
     if (!InstDep.isDef())
       continue;
     
     // If this is a store-store dependence, then the previous store is dead so
     // long as this store is at least as big as it.
-    if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
-      if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){
+    if (doesClobberMemory(InstDep.getInst())) {
+      Instruction *DepStore = InstDep.getInst();
+      if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
+          isElidable(DepStore)) {
         // Delete the store and now-dead instructions that feed it.
         DeleteDeadInstruction(DepStore);
         NumFastStores++;
@@ -146,37 +218,43 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
           --BBI;
         continue;
       }
+    }
+    
+    if (!isElidable(Inst))
+      continue;
     
     // If we're storing the same value back to a pointer that we just
     // loaded from, then the store can be removed.
-    if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
-      if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
-          SI->getOperand(0) == DepLoad) {
-        // DeleteDeadInstruction can delete the current instruction.  Save BBI
-        // in case we need it.
-        WeakVH NextInst(BBI);
-        
-        DeleteDeadInstruction(SI);
-        
-        if (NextInst == 0)  // Next instruction deleted.
-          BBI = BB.begin();
-        else if (BBI != BB.begin())  // Revisit this instruction if possible.
-          --BBI;
-        NumFastStores++;
-        MadeChange = true;
-        continue;
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+            SI->getOperand(0) == DepLoad) {
+          // DeleteDeadInstruction can delete the current instruction.  Save BBI
+          // in case we need it.
+          WeakVH NextInst(BBI);
+          
+          DeleteDeadInstruction(SI);
+          
+          if (NextInst == 0)  // Next instruction deleted.
+            BBI = BB.begin();
+          else if (BBI != BB.begin())  // Revisit this instruction if possible.
+            --BBI;
+          NumFastStores++;
+          MadeChange = true;
+          continue;
+        }
       }
     }
     
     // If this is a lifetime end marker, we can throw away the store.
-    if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
       if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
         // Delete the store and now-dead instructions that feed it.
         // DeleteDeadInstruction can delete the current instruction.  Save BBI
         // in case we need it.
         WeakVH NextInst(BBI);
         
-        DeleteDeadInstruction(SI);
+        DeleteDeadInstruction(Inst);
         
         if (NextInst == 0)  // Next instruction deleted.
           BBI = BB.begin();
@@ -202,11 +280,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
 bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   
-  StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst());
-  if (!Dependency || Dependency->isVolatile())
+  Instruction *Dependency = Dep.getInst();
+  if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
     return false;
   
-  Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject();
+  Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
 
   // Check for aliasing.
   if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
@@ -251,39 +329,28 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     --BBI;
     
     // If we find a store whose pointer is dead.
-    if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
-      if (!S->isVolatile()) {
+    if (doesClobberMemory(BBI)) {
+      if (isElidable(BBI)) {
         // See through pointer-to-pointer bitcasts
-        Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject();
+        Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
 
         // Alloca'd pointers or byval arguments (which are functionally like
         // alloca's) are valid candidates for removal.
         if (deadPointers.count(pointerOperand)) {
           // DCE instructions only used to calculate that store.
+          Instruction *Dead = BBI;
           BBI++;
-          DeleteDeadInstruction(S, &deadPointers);
+          DeleteDeadInstruction(Dead, &deadPointers);
           NumFastStores++;
           MadeChange = true;
+          continue;
         }
       }
       
-      continue;
-    }
-    
-    // We can also remove memcpy's to local variables at the end of a function.
-    if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) {
-      Value *dest = M->getDest()->getUnderlyingObject();
-
-      if (deadPointers.count(dest)) {
-        BBI++;
-        DeleteDeadInstruction(M, &deadPointers);
-        NumFastOther++;
-        MadeChange = true;
+      // Because a memcpy or memmove is also a load, we can't skip it if we
+      // didn't remove it.
+      if (!isa<MemTransferInst>(BBI))
         continue;
-      }
-      
-      // Because a memcpy is also a load, we can't skip it if we didn't remove
-      // it.
     }
     
     Value* killPointer = 0;
@@ -304,11 +371,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       killPointer = L->getPointerOperand();
     } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
       killPointer = V->getOperand(0);
-    } else if (isa<MemCpyInst>(BBI) &&
-               isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) {
-      killPointer = cast<MemCpyInst>(BBI)->getSource();
+    } else if (isa<MemTransferInst>(BBI) &&
+               isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
+      killPointer = cast<MemTransferInst>(BBI)->getSource();
       killPointerSize = cast<ConstantInt>(
-                            cast<MemCpyInst>(BBI)->getLength())->getZExtValue();
+                       cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
     } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
       deadPointers.erase(A);
       
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0e3f750..a8f39c1 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -443,6 +443,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
       valueNumbering[C] = e;
       return e;
     }
+    if (!MD) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
+    }
 
     MemDepResult local_dep = MD->getDependency(C);
 
@@ -624,7 +629,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
 /// lookup - Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
 uint32_t ValueTable::lookup(Value *V) const {
-  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
   assert(VI != valueNumbering.end() && "Value not numbered?");
   return VI->second;
 }
@@ -644,7 +649,7 @@ void ValueTable::erase(Value *V) {
 /// verifyRemoved - Verify that the value is removed from all internal data
 /// structures.
 void ValueTable::verifyRemoved(const Value *V) const {
-  for (DenseMap<Value*, uint32_t>::iterator
+  for (DenseMap<Value*, uint32_t>::const_iterator
          I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
     assert(I->first != V && "Inst still occurs in value numbering map!");
   }
@@ -669,10 +674,12 @@ namespace {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { }
+    explicit GVN(bool nopre = false, bool noloads = false)
+      : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
 
   private:
     bool NoPRE;
+    bool NoLoads;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
 
@@ -682,7 +689,8 @@ namespace {
     // This transformation requires dominator postdominator info
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
-      AU.addRequired<MemoryDependenceAnalysis>();
+      if (!NoLoads)
+        AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
 
       AU.addPreserved<DominatorTree>();
@@ -711,7 +719,9 @@ namespace {
 }
 
 // createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); }
+FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
+  return new GVN(NoPRE, NoLoads);
+}
 
 static RegisterPass<GVN> X("gvn",
                            "Global Value Numbering");
@@ -1476,6 +1486,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 /// processLoad - Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
 bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+  if (!MD)
+    return false;
+
   if (L->isVolatile())
     return false;
 
@@ -1686,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I,
 
     if (constVal) {
       p->replaceAllUsesWith(constVal);
-      if (isa<PointerType>(constVal->getType()))
+      if (MD && isa<PointerType>(constVal->getType()))
         MD->invalidateCachedPointerInfo(constVal);
       VN.erase(p);
 
@@ -1707,7 +1720,7 @@ bool GVN::processInstruction(Instruction *I,
     // Remove it!
     VN.erase(I);
     I->replaceAllUsesWith(repl);
-    if (isa<PointerType>(repl->getType()))
+    if (MD && isa<PointerType>(repl->getType()))
       MD->invalidateCachedPointerInfo(repl);
     toErase.push_back(I);
     return true;
@@ -1721,7 +1734,8 @@ bool GVN::processInstruction(Instruction *I,
 
 /// runOnFunction - This is the main transformation entry point for a function.
 bool GVN::runOnFunction(Function& F) {
-  MD = &getAnalysis<MemoryDependenceAnalysis>();
+  if (!NoLoads)
+    MD = &getAnalysis<MemoryDependenceAnalysis>();
   DT = &getAnalysis<DominatorTree>();
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
@@ -1793,7 +1807,7 @@ bool GVN::processBlock(BasicBlock *BB) {
     for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
          E = toErase.end(); I != E; ++I) {
       DEBUG(errs() << "GVN removed: " << **I << '\n');
-      MD->removeInstruction(*I);
+      if (MD) MD->removeInstruction(*I);
       (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
     }
@@ -1946,12 +1960,12 @@ bool GVN::performPRE(Function &F) {
       localAvail[CurrentBlock]->table[ValNo] = Phi;
 
       CurInst->replaceAllUsesWith(Phi);
-      if (isa<PointerType>(Phi->getType()))
+      if (MD && isa<PointerType>(Phi->getType()))
         MD->invalidateCachedPointerInfo(Phi);
       VN.erase(CurInst);
 
       DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
-      MD->removeInstruction(CurInst);
+      if (MD) MD->removeInstruction(CurInst);
       CurInst->eraseFromParent();
       DEBUG(verifyRemoved(CurInst));
       Changed = true;
@@ -2011,12 +2025,12 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
 
   // Walk through the value number scope to make sure the instruction isn't
   // ferreted away in it.
-  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+  for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
          I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
     const ValueNumberScope *VNS = I->second;
 
     while (VNS) {
-      for (DenseMap<uint32_t, Value*>::iterator
+      for (DenseMap<uint32_t, Value*>::const_iterator
              II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
         assert(II->second != Inst && "Inst still in value numbering scope!");
       }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b0bc70c..2912421 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -536,8 +536,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
   BasicBlock *ExitBlock = L->getExitBlock();
   if (!ExitBlock) return;
 
-  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
   BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) return;
+
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
   BasicBlock::iterator I = Preheader->getTerminator();
   while (I != Preheader->begin()) {
     --I;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7e75cfb..1c48366 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -42,6 +42,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
@@ -283,6 +284,8 @@ namespace {
     Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
     Instruction *visitCallInst(CallInst &CI);
     Instruction *visitInvokeInst(InvokeInst &II);
+
+    Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
     Instruction *visitPHINode(PHINode &PN);
     Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
     Instruction *visitAllocaInst(AllocaInst &AI);
@@ -380,10 +383,6 @@ namespace {
     /// commutative operators.
     bool SimplifyCommutative(BinaryOperator &I);
 
-    /// SimplifyCompare - This reorders the operands of a CmpInst to get them in
-    /// most-complex to least-complex order.
-    bool SimplifyCompare(CmpInst &I);
-
     /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
     /// based on the demanded bits.
     Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
@@ -478,6 +477,34 @@ static const Type *getPromotedType(const Type *Ty) {
   return Ty;
 }
 
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+static bool ShouldChangeType(const Type *From, const Type *To,
+                             const TargetData *TD) {
+  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+  
+  // If we don't have TD, we don't know if the source/dest are legal.
+  if (!TD) return false;
+  
+  unsigned FromWidth = From->getPrimitiveSizeInBits();
+  unsigned ToWidth = To->getPrimitiveSizeInBits();
+  bool FromLegal = TD->isLegalInteger(FromWidth);
+  bool ToLegal = TD->isLegalInteger(ToWidth);
+  
+  // If this is a legal integer from type, and the result would be an illegal
+  // type, don't do the transformation.
+  if (FromLegal && !ToLegal)
+    return false;
+  
+  // Otherwise, if both are illegal, do not increase the size of the result. We
+  // do allow things like i160 -> i64, but not i64 -> i160.
+  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+    return false;
+  
+  return true;
+}
+
 /// getBitCastOperand - If the specified operand is a CastInst, a constant
 /// expression bitcast, or a GetElementPtrInst with all zero indices, return the
 /// operand value, otherwise return null.
@@ -584,17 +611,6 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
   return Changed;
 }
 
-/// SimplifyCompare - For a CmpInst this function just orders the operands
-/// so that theyare listed from right (least complex) to left (most complex).
-/// This puts constants before unary operators before binary operators.
-bool InstCombiner::SimplifyCompare(CmpInst &I) {
-  if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1)))
-    return false;
-  I.swapOperands();
-  // Compare instructions are not associative so there's nothing else we can do.
-  return true;
-}
-
 // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
 // if the LHS is a constant zero (which is the 'negate' form).
 //
@@ -4304,25 +4320,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (isa<UndefValue>(Op1))                         // X & undef -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  // and X, X = X
-  if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, Op1);
+  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+    
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
-  if (isa<VectorType>(I.getType())) {
-    if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
-      if (CP->isAllOnesValue())            // X & <-1,-1> -> X
-        return ReplaceInstUsesWith(I, I.getOperand(0));
-    } else if (isa<ConstantAggregateZero>(Op1)) {
-      return ReplaceInstUsesWith(I, Op1);  // X & <0,0> -> <0,0>
-    }
-  }
+  
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
     const APInt &AndRHSMask = AndRHS->getValue();
@@ -4443,42 +4449,29 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         return NV;
   }
 
-  Value *Op0NotVal = dyn_castNotVal(Op0);
-  Value *Op1NotVal = dyn_castNotVal(Op1);
-
-  if (Op0NotVal == Op1 || Op1NotVal == Op0)  // A & ~A  == ~A & A == 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // (~A & ~B) == (~(A | B)) - De Morgan's Law
-  if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-    Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
-                                  I.getName()+".demorgan");
-    return BinaryOperator::CreateNot(Or);
-  }
-  
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+                                      I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(Or);
+      }
+
   {
     Value *A = 0, *B = 0, *C = 0, *D = 0;
-    if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
-      if (A == Op1 || B == Op1)    // (A | ?) & A  --> A
-        return ReplaceInstUsesWith(I, Op1);
-    
-      // (A|B) & ~(A&B) -> A^B
-      if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
-        if ((A == C && B == D) || (A == D && B == C))
-          return BinaryOperator::CreateXor(A, B);
-      }
-    }
+    // (A|B) & ~(A&B) -> A^B
+    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
     
-    if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
-      if (A == Op0 || B == Op0)    // A & (A | ?)  --> A
-        return ReplaceInstUsesWith(I, Op0);
-
-      // ~(A&B) & (A|B) -> A^B
-      if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
-        if ((A == C && B == D) || (A == D && B == C))
-          return BinaryOperator::CreateXor(A, B);
-      }
-    }
+    // ~(A&B) & (A|B) -> A^B
+    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
     
     if (Op0->hasOneUse() &&
         match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
@@ -5010,27 +5003,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (isa<UndefValue>(Op1))                       // X | undef -> -1
-    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-  // or X, X = X
-  if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, Op0);
-
+  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
-  if (isa<VectorType>(I.getType())) {
-    if (isa<ConstantAggregateZero>(Op1)) {
-      return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
-    } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
-      if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
-        return ReplaceInstUsesWith(I, I.getOperand(1));
-    }
-  }
 
-  // or X, -1 == -1
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     ConstantInt *C1 = 0; Value *X = 0;
     // (X & C1) | C2 --> (X | C2) & (C1|C2)
@@ -5063,13 +5044,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   Value *A = 0, *B = 0;
   ConstantInt *C1 = 0, *C2 = 0;
 
-  if (match(Op0, m_And(m_Value(A), m_Value(B))))
-    if (A == Op1 || B == Op1)    // (A & ?) | A  --> A
-      return ReplaceInstUsesWith(I, Op1);
-  if (match(Op1, m_And(m_Value(A), m_Value(B))))
-    if (A == Op0 || B == Op0)    // A | (A & ?)  --> A
-      return ReplaceInstUsesWith(I, Op0);
-
   // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
   // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
   if (match(Op0, m_Or(m_Value(), m_Value())) ||
@@ -5203,23 +5177,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (Ret) return Ret;
   }
 
-  if ((A = dyn_castNotVal(Op0))) {   // ~A | Op1
-    if (A == Op1)   // ~A | A == -1
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-  } else {
-    A = 0;
-  }
-  // Note, A is still live here!
-  if ((B = dyn_castNotVal(Op1))) {   // Op0 | ~B
-    if (Op0 == B)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-    // (~A | ~B) == (~(A & B)) - De Morgan's Law
-    if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-      Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");
-      return BinaryOperator::CreateNot(And);
-    }
-  }
+  // (~A | ~B) == (~(A & B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+                                        I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(And);
+      }
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
@@ -5942,28 +5907,25 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
 }
 
 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
-  bool Changed = SimplifyCompare(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  bool Changed = false;
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+    I.swapOperands();
+    Changed = true;
+  }
 
-  // Fold trivial predicates.
-  if (I.getPredicate() == FCmpInst::FCMP_FALSE)
-    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
-  if (I.getPredicate() == FCmpInst::FCMP_TRUE)
-    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   
+  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
   // Simplify 'fcmp pred X, X'
   if (Op0 == Op1) {
     switch (I.getPredicate()) {
     default: llvm_unreachable("Unknown predicate!");
-    case FCmpInst::FCMP_UEQ:    // True if unordered or equal
-    case FCmpInst::FCMP_UGE:    // True if unordered, greater than, or equal
-    case FCmpInst::FCMP_ULE:    // True if unordered, less than, or equal
-      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
-    case FCmpInst::FCMP_OGT:    // True if ordered and greater than
-    case FCmpInst::FCMP_OLT:    // True if ordered and less than
-    case FCmpInst::FCMP_ONE:    // True if ordered and operands are unequal
-      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
-      
     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
     case FCmpInst::FCMP_ULT:    // True if unordered or less than
     case FCmpInst::FCMP_UGT:    // True if unordered or greater than
@@ -5984,23 +5946,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     }
   }
     
-  if (isa<UndefValue>(Op1))                  // fcmp pred X, undef -> undef
-    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
-    // If the constant is a nan, see if we can fold the comparison based on it.
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->getValueAPF().isNaN()) {
-        if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and...
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-        assert(FCmpInst::isUnordered(I.getPredicate()) &&
-               "Comparison must be either ordered or unordered!");
-        // True if unordered.
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      }
-    }
-    
     if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
       switch (LHSI->getOpcode()) {
       case Instruction::PHI:
@@ -6047,26 +5994,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 }
 
 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
-  bool Changed = SimplifyCompare(I);
+  bool Changed = false;
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+    I.swapOperands();
+    Changed = true;
+  }
+  
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  const Type *Ty = Op0->getType();
-
-  // icmp X, X
-  if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
-                                                   I.isTrueWhenEqual()));
-
-  if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
-    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
   
-  // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
-  // addresses never equal each other!  We already know that Op0 != Op1.
-  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || 
-       isa<ConstantPointerNull>(Op0)) &&
-      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || 
-       isa<ConstantPointerNull>(Op1)))
-    return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), 
-                                                   !I.isTrueWhenEqual()));
+  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  const Type *Ty = Op0->getType();
 
   // icmp's with boolean values can always be turned into bitwise operations
   if (Ty == Type::getInt1Ty(*Context)) {
@@ -6131,27 +6074,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     
     // If we have an icmp le or icmp ge instruction, turn it into the
     // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
-    // them being folded in the code below.
+    // them being folded in the code below.  The SimplifyICmpInst code has
+    // already handled the edge cases for us, so we just assert on them.
     switch (I.getPredicate()) {
     default: break;
     case ICmpInst::ICMP_ULE:
-      if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      assert(!CI->isMaxValue(false));                 // A <=u MAX -> TRUE
       return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
                           AddOne(CI));
     case ICmpInst::ICMP_SLE:
-      if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      assert(!CI->isMaxValue(true));                  // A <=s MAX -> TRUE
       return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
                           AddOne(CI));
     case ICmpInst::ICMP_UGE:
-      if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      assert(!CI->isMinValue(false));                  // A >=u MIN -> TRUE
       return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
                           SubOne(CI));
     case ICmpInst::ICMP_SGE:
-      if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      assert(!CI->isMinValue(true));                   // A >=s MIN -> TRUE
       return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
                           SubOne(CI));
     }
@@ -8083,8 +8023,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
 Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantExpr::getIntegerCast(C, Ty,
-                                               isSigned /*Sext or ZExt*/);
+    return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
 
   // Otherwise, it must be an instruction.
   Instruction *I = cast<Instruction>(V);
@@ -8117,8 +8056,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
       return I->getOperand(0);
     
     // Otherwise, must be the same type of cast, so just reinsert a new one.
-    Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
-                           Ty);
+    Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty);
     break;
   case Instruction::Select: {
     Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
@@ -8167,9 +8105,15 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
       return NV;
 
   // If we are casting a PHI then fold the cast into the PHI
-  if (isa<PHINode>(Src))
-    if (Instruction *NV = FoldOpIntoPhi(CI))
-      return NV;
+  if (isa<PHINode>(Src)) {
+    // We don't do this if this would create a PHI node with an illegal type if
+    // it is currently legal.
+    if (!isa<IntegerType>(Src->getType()) ||
+        !isa<IntegerType>(CI.getType()) ||
+        ShouldChangeType(CI.getType(), Src->getType(), TD))
+      if (Instruction *NV = FoldOpIntoPhi(CI))
+        return NV;
+  }
   
   return 0;
 }
@@ -8289,23 +8233,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
   return commonCastTransforms(CI);
 }
 
-/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy
-/// type like i42.  We don't want to introduce operations on random non-legal
-/// integer types where they don't already exist in the code.  In the future,
-/// we should consider making this based off target-data, so that 32-bit targets
-/// won't get i64 operations etc.
-static bool isSafeIntegerType(const Type *Ty) {
-  switch (Ty->getPrimitiveSizeInBits()) {
-  case 8:
-  case 16:
-  case 32:
-  case 64:
-    return true;
-  default: 
-    return false;
-  }
-}
-
 /// commonIntCastTransforms - This function implements the common transforms
 /// for trunc, zext, and sext.
 Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
@@ -8334,8 +8261,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   // Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
-  if ((isSafeIntegerType(DestTy->getScalarType()) ||
-       !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+  if ((isa<VectorType>(DestTy) ||
+       ShouldChangeType(SrcI->getType(), DestTy, TD)) &&
       CanEvaluateInDifferentType(SrcI, DestTy,
                                  CI.getOpcode(), NumCastsRemoved)) {
     // If this cast is a truncate, evaluting in a different type always
@@ -8356,6 +8283,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
       break;
     case Instruction::ZExt: {
       DoXForm = NumCastsRemoved >= 1;
+      
       if (!DoXForm && 0) {
         // If it's unnecessary to issue an AND to clear the high bits, it's
         // always profitable to do this xform.
@@ -8522,7 +8450,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
       return BinaryOperator::CreateLShr(V1, V2);
     }
   }
-  
+ 
   return 0;
 }
 
@@ -10880,9 +10808,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
 }
 
 
-// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-// operator and they all are only used by the PHI, PHI together their
-// inputs, and do the operation once, to the result of the PHI.
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
 Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
 
@@ -10900,6 +10829,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   
   if (isa<CastInst>(FirstInst)) {
     CastSrcTy = FirstInst->getOperand(0)->getType();
+
+    // Be careful about transforming integer PHIs.  We don't want to pessimize
+    // the code by turning an i32 into an i1293.
+    if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+      if (!ShouldChangeType(PN.getType(), CastSrcTy, TD))
+        return 0;
+    }
   } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
     // Can fold binop, compare or shift here if the RHS is a constant, 
     // otherwise call FoldPHIArgBinOpIntoPHI.
@@ -11012,6 +10948,222 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
 }
 
 
+namespace {
+struct PHIUsageRecord {
+  unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
+  unsigned Shift;     // The amount shifted.
+  Instruction *Inst;  // The trunc instruction.
+  
+  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+    : PHIId(pn), Shift(Sh), Inst(User) {}
+  
+  bool operator<(const PHIUsageRecord &RHS) const {
+    if (PHIId < RHS.PHIId) return true;
+    if (PHIId > RHS.PHIId) return false;
+    if (Shift < RHS.Shift) return true;
+    if (Shift > RHS.Shift) return false;
+    return Inst->getType()->getPrimitiveSizeInBits() <
+           RHS.Inst->getType()->getPrimitiveSizeInBits();
+  }
+};
+  
+struct LoweredPHIRecord {
+  PHINode *PN;        // The PHI that was lowered.
+  unsigned Shift;     // The amount shifted.
+  unsigned Width;     // The width extracted.
+  
+  LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+  
+  // Ctor form used by DenseMap.
+  LoweredPHIRecord(PHINode *pn, unsigned Sh)
+    : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LoweredPHIRecord> {
+    static inline LoweredPHIRecord getEmptyKey() {
+      return LoweredPHIRecord(0, 0);
+    }
+    static inline LoweredPHIRecord getTombstoneKey() {
+      return LoweredPHIRecord(0, 1);
+    }
+    static unsigned getHashValue(const LoweredPHIRecord &Val) {
+      return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+             (Val.Width>>3);
+    }
+    static bool isEqual(const LoweredPHIRecord &LHS,
+                        const LoweredPHIRecord &RHS) {
+      return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+             LHS.Width == RHS.Width;
+    }
+    static bool isPod() { return true; }
+  };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
+/// so, we split the PHI into the various pieces being extracted.  This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr.  We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+  // PHIUsers - Keep track of all of the truncated values extracted from a set
+  // of PHIs, along with their offset.  These are the things we want to rewrite.
+  SmallVector<PHIUsageRecord, 16> PHIUsers;
+  
+  // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+  // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+  // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+  // check the uses of (to ensure they are all extracts).
+  SmallVector<PHINode*, 8> PHIsToSlice;
+  SmallPtrSet<PHINode*, 8> PHIsInspected;
+  
+  PHIsToSlice.push_back(&FirstPhi);
+  PHIsInspected.insert(&FirstPhi);
+  
+  for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+    PHINode *PN = PHIsToSlice[PHIId];
+    
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      
+      // If the user is a PHI, inspect its uses recursively.
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (PHIsInspected.insert(UserPN))
+          PHIsToSlice.push_back(UserPN);
+        continue;
+      }
+      
+      // Truncates are always ok.
+      if (isa<TruncInst>(User)) {
+        PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+        continue;
+      }
+      
+      // Otherwise it must be a lshr which can only be used by one trunc.
+      if (User->getOpcode() != Instruction::LShr ||
+          !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+          !isa<ConstantInt>(User->getOperand(1)))
+        return 0;
+      
+      unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+      PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+    }
+  }
+  
+  // If we have no users, they must be all self uses, just nuke the PHI.
+  if (PHIUsers.empty())
+    return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+  
+  // If this phi node is transformable, create new PHIs for all the pieces
+  // extracted out of it.  First, sort the users by their offset and size.
+  array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+  
+  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+        );
+  
+  // PredValues - This is a temporary used when rewriting PHI nodes.  It is
+  // hoisted out here to avoid construction/destruction thrashing.
+  DenseMap<BasicBlock*, Value*> PredValues;
+  
+  // ExtractedVals - Each new PHI we introduce is saved here so we don't
+  // introduce redundant PHIs.
+  DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+  
+  for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+    unsigned PHIId = PHIUsers[UserI].PHIId;
+    PHINode *PN = PHIsToSlice[PHIId];
+    unsigned Offset = PHIUsers[UserI].Shift;
+    const Type *Ty = PHIUsers[UserI].Inst->getType();
+    
+    PHINode *EltPHI;
+    
+    // If we've already lowered a user like this, reuse the previously lowered
+    // value.
+    if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+      
+      // Otherwise, Create the new PHI node for this user.
+      EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+      assert(EltPHI->getType() != PN->getType() &&
+             "Truncate didn't shrink phi?");
+    
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        Value *&PredVal = PredValues[Pred];
+        
+        // If we already have a value for this predecessor, reuse it.
+        if (PredVal) {
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+
+        // Handle the PHI self-reuse case.
+        Value *InVal = PN->getIncomingValue(i);
+        if (InVal == PN) {
+          PredVal = EltPHI;
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+          // If the incoming value was a PHI, and if it was one of the PHIs we
+          // already rewrote it, just use the lowered value.
+          if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+            PredVal = Res;
+            EltPHI->addIncoming(PredVal, Pred);
+            continue;
+          }
+        }
+        
+        // Otherwise, do an extract in the predecessor.
+        Builder->SetInsertPoint(Pred, Pred->getTerminator());
+        Value *Res = InVal;
+        if (Offset)
+          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+                                                          Offset), "extract");
+        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+        PredVal = Res;
+        EltPHI->addIncoming(Res, Pred);
+        
+        // If the incoming value was a PHI, and if it was one of the PHIs we are
+        // rewriting, we will ultimately delete the code we inserted.  This
+        // means we need to revisit that PHI to make sure we extract out the
+        // needed piece.
+        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+          if (PHIsInspected.count(OldInVal)) {
+            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+                                          OldInVal)-PHIsToSlice.begin();
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+                                              cast<Instruction>(Res)));
+            ++UserE;
+          }
+      }
+      PredValues.clear();
+      
+      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
+                   << *EltPHI << '\n');
+      ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+    }
+    
+    // Replace the use of this piece with the PHI node.
+    ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+  }
+  
+  // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+  // with undefs.
+  Value *Undef = UndefValue::get(FirstPhi.getType());
+  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+    ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+  return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -11117,6 +11269,15 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       }
     }
 
+  // If this is an integer PHI and we know that it has an illegal type, see if
+  // it is only used by trunc or trunc(lshr) operations.  If so, we split the
+  // PHI into the various pieces being extracted.  This sort of thing is
+  // introduced when SROA promotes an aggregate to a single large integer type.
+  if (isa<IntegerType>(PN.getType()) && TD &&
+      !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+    if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+      return Res;
+  
   return 0;
 }
 
@@ -12210,6 +12371,47 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
                                       exti, exte);
   }
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+    // We're extracting from an intrinsic, see if we're the only user, which
+    // allows us to simplify multiple result intrinsics to simpler things that
+    // just get one value..
+    if (II->hasOneUse()) {
+      // Check if we're grabbing the overflow bit or the result of a 'with
+      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
+      // and replace it with a traditional binary instruction.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateAdd(LHS, RHS);
+        }
+        break;
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateSub(LHS, RHS);
+        }
+        break;
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateMul(LHS, RHS);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
   // Can't simplify extracts from other values. Note that nested extracts are
   // already simplified implicitely by the above (extract ( extract (insert) )
   // will be translated into extract ( insert ( extract ) ) first and then just
@@ -12715,29 +12917,33 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     if (isa<UndefValue>(RHS)) {
       std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
 
-      std::vector<unsigned> NewMask;
-      for (unsigned i = 0, e = Mask.size(); i != e; ++i)
-        if (Mask[i] >= 2*e)
-          NewMask.push_back(2*e);
-        else
-          NewMask.push_back(LHSMask[Mask[i]]);
+      if (LHSMask.size() == Mask.size()) {
+        std::vector<unsigned> NewMask;
+        for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+          if (Mask[i] >= 2*e)
+            NewMask.push_back(2*e);
+          else
+            NewMask.push_back(LHSMask[Mask[i]]);
       
-      // If the result mask is equal to the src shuffle or this shuffle mask, do
-      // the replacement.
-      if (NewMask == LHSMask || NewMask == Mask) {
-        unsigned LHSInNElts =
-          cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements();
-        std::vector<Constant*> Elts;
-        for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
-          if (NewMask[i] >= LHSInNElts*2) {
-            Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
-          } else {
-            Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));
+        // If the result mask is equal to the src shuffle or this
+        // shuffle mask, do the replacement.
+        if (NewMask == LHSMask || NewMask == Mask) {
+          unsigned LHSInNElts =
+            cast<VectorType>(LHSSVI->getOperand(0)->getType())->
+            getNumElements();
+          std::vector<Constant*> Elts;
+          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+            if (NewMask[i] >= LHSInNElts*2) {
+              Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
+            } else {
+              Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
+                                              NewMask[i]));
+            }
           }
+          return new ShuffleVectorInst(LHSSVI->getOperand(0),
+                                       LHSSVI->getOperand(1),
+                                       ConstantVector::get(Elts));
         }
-        return new ShuffleVectorInst(LHSSVI->getOperand(0),
-                                     LHSSVI->getOperand(1),
-                                     ConstantVector::get(Elts));
       }
     }
   }
@@ -12824,7 +13030,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
       
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
-        if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
           DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
                        << *Inst << '\n');
           Inst->replaceAllUsesWith(C);
@@ -12846,8 +13052,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
           if (!FoldedConstants.insert(CE))
             continue;
           
-          Constant *NewC =
-            ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
           if (NewC && NewC != CE) {
             *i = NewC;
             MadeIRChange = true;
@@ -12954,7 +13159,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
     // Instruction isn't dead, see if we can constant propagate it.
     if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
-      if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+      if (Constant *C = ConstantFoldInstruction(I, TD)) {
         DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
         // Add operands to the worklist.
@@ -13065,7 +13270,7 @@ bool InstCombiner::runOnFunction(Function &F) {
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
   IRBuilder<true, TargetFolder, InstCombineIRInserter> 
-    TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
+    TheBuilder(F.getContext(), TargetFolder(TD),
                InstCombineIRInserter(Worklist));
   Builder = &TheBuilder;
   
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 10c9ec6..5864113 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,7 +16,8 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -40,6 +41,12 @@ Threshold("jump-threading-threshold",
           cl::desc("Max block size to duplicate for jump threading"),
           cl::init(6), cl::Hidden);
 
+// Turn on use of LazyValueInfo.
+static cl::opt<bool>
+EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+
+
+
 namespace {
   /// This pass performs 'jump threading', which looks at blocks that have
   /// multiple predecessors and multiple successors.  If one or more of the
@@ -59,6 +66,7 @@ namespace {
   ///
   class JumpThreading : public FunctionPass {
     TargetData *TD;
+    LazyValueInfo *LVI;
 #ifdef NDEBUG
     SmallPtrSet<BasicBlock*, 16> LoopHeaders;
 #else
@@ -69,20 +77,31 @@ namespace {
     JumpThreading() : FunctionPass(&ID) {}
 
     bool runOnFunction(Function &F);
-    void FindLoopHeaders(Function &F);
     
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      if (EnableLVI)
+        AU.addRequired<LazyValueInfo>();
+    }
+    
+    void FindLoopHeaders(Function &F);
     bool ProcessBlock(BasicBlock *BB);
-    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+    bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+                    BasicBlock *SuccBB);
     bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
                                           BasicBlock *PredBB);
-
-    BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
+    
+    typedef SmallVectorImpl<std::pair<ConstantInt*,
+                                      BasicBlock*> > PredValueInfo;
+    
+    bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+                                         PredValueInfo &Result);
+    bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
+    
+    
     bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
     bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
 
     bool ProcessJumpOnPHI(PHINode *PN);
-    bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd);
-    bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB);
     
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
   };
@@ -100,6 +119,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 bool JumpThreading::runOnFunction(Function &F) {
   DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
   TD = getAnalysisIfAvailable<TargetData>();
+  LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
   
   FindLoopHeaders(F);
   
@@ -109,6 +129,7 @@ bool JumpThreading::runOnFunction(Function &F) {
     bool Changed = false;
     for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
       BasicBlock *BB = I;
+      // Thread all of the branches we can over this block. 
       while (ProcessBlock(BB))
         Changed = true;
       
@@ -123,6 +144,29 @@ bool JumpThreading::runOnFunction(Function &F) {
         LoopHeaders.erase(BB);
         DeleteDeadBlock(BB);
         Changed = true;
+      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+        // Can't thread an unconditional jump, but if the block is "almost
+        // empty", we can replace uses of it with uses of the successor and make
+        // this dead.
+        if (BI->isUnconditional() && 
+            BB != &BB->getParent()->getEntryBlock()) {
+          BasicBlock::iterator BBI = BB->getFirstNonPHI();
+          // Ignore dbg intrinsics.
+          while (isa<DbgInfoIntrinsic>(BBI))
+            ++BBI;
+          // If the terminator is the only non-phi instruction, try to nuke it.
+          if (BBI->isTerminator()) {
+            // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+            // block, we have to make sure it isn't in the LoopHeaders set.  We
+            // reinsert afterward in the rare case when the block isn't deleted.
+            bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+            
+            if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+              Changed = true;
+            else if (ErasedFromLoopHeaders)
+              LoopHeaders.insert(BB);
+          }
+        }
       }
     }
     AnotherIteration = Changed;
@@ -139,6 +183,10 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
   /// Ignore PHI nodes, these will be flattened when duplication happens.
   BasicBlock::const_iterator I = BB->getFirstNonPHI();
   
+  // FIXME: THREADING will delete values that are just used to compute the
+  // branch, so they shouldn't count against the duplication cost.
+  
+  
   // Sum up the cost of each instruction until we get to the terminator.  Don't
   // include the terminator because the copy won't include it.
   unsigned Size = 0;
@@ -173,8 +221,6 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
   return Size;
 }
 
-
-
 /// FindLoopHeaders - We do not want jump threading to turn proper loop
 /// structures into irreducible loops.  Doing this breaks up the loop nesting
 /// hierarchy and pessimizes later transformations.  To prevent this from
@@ -198,29 +244,181 @@ void JumpThreading::FindLoopHeaders(Function &F) {
     LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
 }
 
-
-/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
-/// value for the PHI, factor them together so we get one block to thread for
-/// the whole group.
-/// This is important for things like "phi i1 [true, true, false, true, x]"
-/// where we only need to clone the block for the true blocks once.
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt in any of our
+/// predecessors.  If so, return the known list of value and pred BB in the
+/// result vector.  If a value is known to be undef, it is returned as null.
+///
+/// This returns true if there were any known values.
 ///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
-  SmallVector<BasicBlock*, 16> CommonPreds;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == Val)
-      CommonPreds.push_back(PN->getIncomingBlock(i));
-  
-  if (CommonPreds.size() == 1)
-    return CommonPreds[0];
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+  // If V is a constantint, then it is known in all predecessors.
+  if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(V);
     
-  DEBUG(errs() << "  Factoring out " << CommonPreds.size()
-        << " common predecessors.\n");
-  return SplitBlockPredecessors(PN->getParent(),
-                                &CommonPreds[0], CommonPreds.size(),
-                                ".thr_comm", this);
-}
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(CI, *PI));
+    return true;
+  }
+  
+  // If V is a non-instruction value, or an instruction in a different block,
+  // then it can't be derived from a PHI.
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || I->getParent() != BB) {
+    
+    // Okay, if this is a live-in value, see if it has a known value at the end
+    // of any of our predecessors.
+    //
+    // FIXME: This should be an edge property, not a block end property.
+    /// TODO: Per PR2563, we could infer value range information about a
+    /// predecessor based on its terminator.
+    //
+    if (LVI) {
+      // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+      // "I" is a non-local compare-with-a-constant instruction.  This would be
+      // able to handle value inequalities better, for example if the compare is
+      // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+      // Perhaps getConstantOnEdge should be smart enough to do this?
+      
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+        // If the value is known by LazyValueInfo to be a constant in a
+        // predecessor, use that information to try to thread this block.
+        Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+        if (PredCst == 0 ||
+            (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+          continue;
+        
+        Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+      }
+      
+      return !Result.empty();
+    }
+    
+    return false;
+  }
+  
+  /// If I is a PHI node, then we know the incoming values for any constants.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
+        ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
+        Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+      }
+    }
+    return !Result.empty();
+  }
+  
+  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+  // Handle some boolean conditions.
+  if (I->getType()->getPrimitiveSizeInBits() == 1) { 
+    // X | true -> true
+    // X & false -> false
+    if (I->getOpcode() == Instruction::Or ||
+        I->getOpcode() == Instruction::And) {
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
+      
+      if (LHSVals.empty() && RHSVals.empty())
+        return false;
+      
+      ConstantInt *InterestingVal;
+      if (I->getOpcode() == Instruction::Or)
+        InterestingVal = ConstantInt::getTrue(I->getContext());
+      else
+        InterestingVal = ConstantInt::getFalse(I->getContext());
+      
+      // Scan for the sentinel.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+        if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+          Result.push_back(LHSVals[i]);
+      for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+        if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+          Result.push_back(RHSVals[i]);
+      return !Result.empty();
+    }
+    
+    // Handle the NOT form of XOR.
+    if (I->getOpcode() == Instruction::Xor &&
+        isa<ConstantInt>(I->getOperand(1)) &&
+        cast<ConstantInt>(I->getOperand(1))->isOne()) {
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+      if (Result.empty())
+        return false;
+
+      // Invert the known values.
+      for (unsigned i = 0, e = Result.size(); i != e; ++i)
+        if (Result[i].first)
+          Result[i].first =
+            cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+      return true;
+    }
+  }
   
+  // Handle compare with phi operand, where the PHI is defined in this block.
+  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+    if (PN && PN->getParent() == BB) {
+      // We can do this simplification if any comparisons fold to true or false.
+      // See if any do.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *PredBB = PN->getIncomingBlock(i);
+        Value *LHS = PN->getIncomingValue(i);
+        Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+        
+        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+        if (Res == 0) {
+          if (!LVI || !isa<Constant>(RHS))
+            continue;
+          
+          LazyValueInfo::Tristate 
+            ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+                                           cast<Constant>(RHS), PredBB, BB);
+          if (ResT == LazyValueInfo::Unknown)
+            continue;
+          Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+        }
+        
+        if (isa<UndefValue>(Res))
+          Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
+        else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
+          Result.push_back(std::make_pair(CI, PredBB));
+      }
+      
+      return !Result.empty();
+    }
+    
+    
+    // If comparing a live-in value against a constant, see if we know the
+    // live-in value on any predecessors.
+    if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+        Cmp->getType()->isInteger() && // Not vector compare.
+        (!isa<Instruction>(Cmp->getOperand(0)) ||
+         cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
+      Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+      
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+        // If the value is known by LazyValueInfo to be a constant in a
+        // predecessor, use that information to try to thread this block.
+        LazyValueInfo::Tristate
+          Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+                                        RHSCst, *PI, BB);
+        if (Res == LazyValueInfo::Unknown)
+          continue;
+
+        Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+        Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+      }
+      
+      return !Result.empty();
+    }
+  }
+  return false;
+}
+
+
 
 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
 /// in an undefined jump, decide which block is best to revector to.
@@ -251,7 +449,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // successor, merge the blocks.  This encourages recursive jump threading
   // because now the condition in this block can be threaded through
   // predecessors of our predecessor block.
-  if (BasicBlock *SinglePred = BB->getSinglePredecessor())
+  if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
     if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
         SinglePred != BB) {
       // If SinglePred was a loop header, BB becomes one.
@@ -267,10 +465,10 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
         BB->moveBefore(&BB->getParent()->getEntryBlock());
       return true;
     }
-  
-  // See if this block ends with a branch or switch.  If so, see if the
-  // condition is a phi node.  If so, and if an entry of the phi node is a
-  // constant, we can thread the block.
+  }
+
+  // Look to see if the terminator is a branch of switch, if not we can't thread
+  // it.
   Value *Condition;
   if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
     // Can't thread an unconditional jump.
@@ -301,7 +499,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     TerminatorInst *BBTerm = BB->getTerminator();
     for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
       if (i == BestSucc) continue;
-      BBTerm->getSuccessor(i)->removePredecessor(BB);
+      RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
     }
     
     DEBUG(errs() << "  In block '" << BB->getName()
@@ -318,7 +516,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   //     br COND, BBX, BBY
   //  BBX:
   //     br COND, BBZ, BBW
-  if (!Condition->hasOneUse() && // Multiple uses.
+  if (!LVI &&
+      !Condition->hasOneUse() && // Multiple uses.
       (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
     pred_iterator PI = pred_begin(BB), E = pred_end(BB);
     if (isa<BranchInst>(BB->getTerminator())) {
@@ -338,52 +537,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   }
 
   // All the rest of our checks depend on the condition being an instruction.
-  if (CondInst == 0)
+  if (CondInst == 0) {
+    // FIXME: Unify this with code below.
+    if (LVI && ProcessThreadableEdges(Condition, BB))
+      return true;
     return false;
+  }  
+    
   
   // See if this is a phi node in the current block.
   if (PHINode *PN = dyn_cast<PHINode>(CondInst))
     if (PN->getParent() == BB)
       return ProcessJumpOnPHI(PN);
   
-  // If this is a conditional branch whose condition is and/or of a phi, try to
-  // simplify it.
-  if ((CondInst->getOpcode() == Instruction::And || 
-       CondInst->getOpcode() == Instruction::Or) &&
-      isa<BranchInst>(BB->getTerminator()) &&
-      ProcessBranchOnLogical(CondInst, BB,
-                             CondInst->getOpcode() == Instruction::And))
-    return true;
-  
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
-    if (isa<PHINode>(CondCmp->getOperand(0))) {
-      // If we have "br (phi != 42)" and the phi node has any constant values
-      // as operands, we can thread through this block.
-      // 
-      // If we have "br (cmp phi, x)" and the phi node contains x such that the
-      // comparison uniquely identifies the branch target, we can thread
-      // through this block.
-
-      if (ProcessBranchOnCompare(CondCmp, BB))
-        return true;      
-    }
-    
-    // If we have a comparison, loop over the predecessors to see if there is
-    // a condition with the same value.
-    pred_iterator PI = pred_begin(BB), E = pred_end(BB);
-    for (; PI != E; ++PI)
-      if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
-        if (PBI->isConditional() && *PI != BB) {
-          if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
-            if (CI->getOperand(0) == CondCmp->getOperand(0) &&
-                CI->getOperand(1) == CondCmp->getOperand(1) &&
-                CI->getPredicate() == CondCmp->getPredicate()) {
-              // TODO: Could handle things like (x != 4) --> (x == 17)
-              if (ProcessBranchOnDuplicateCond(*PI, BB))
-                return true;
+    if (!LVI &&
+        (!isa<PHINode>(CondCmp->getOperand(0)) ||
+         cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
+      // If we have a comparison, loop over the predecessors to see if there is
+      // a condition with a lexically identical value.
+      pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+      for (; PI != E; ++PI)
+        if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+          if (PBI->isConditional() && *PI != BB) {
+            if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+              if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+                  CI->getOperand(1) == CondCmp->getOperand(1) &&
+                  CI->getPredicate() == CondCmp->getPredicate()) {
+                // TODO: Could handle things like (x != 4) --> (x == 17)
+                if (ProcessBranchOnDuplicateCond(*PI, BB))
+                  return true;
+              }
             }
           }
-        }
+    }
   }
 
   // Check for some cases that are worth simplifying.  Right now we want to look
@@ -398,10 +585,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     if (isa<Constant>(CondCmp->getOperand(1)))
       SimplifyValue = CondCmp->getOperand(0);
   
+  // TODO: There are other places where load PRE would be profitable, such as
+  // more complex comparisons.
   if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
     if (SimplifyPartiallyRedundantLoad(LI))
       return true;
   
+  
+  // Handle a variety of cases where we are branching on something derived from
+  // a PHI node in the current block.  If we can prove that any predecessors
+  // compute a predictable value based on a PHI node, thread those predecessors.
+  //
+  if (ProcessThreadableEdges(CondInst, BB))
+    return true;
+  
+  
   // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
   // "(X == 4)" thread through this block.
   
@@ -459,8 +657,11 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   // Next, figure out which successor we are threading to.
   BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
   
+  SmallVector<BasicBlock*, 2> Preds;
+  Preds.push_back(PredBB);
+  
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB);
+  return ThreadEdge(BB, Preds, SuccBB);
 }
 
 /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
@@ -553,7 +754,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   Value *LoadedPtr = LI->getOperand(0);
 
   // If the loaded operand is defined in the LoadBB, it can't be available.
-  // FIXME: Could do PHI translation, that would be fun :)
+  // TODO: Could do simple PHI translation, that would be fun :)
   if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
     if (PtrOp->getParent() == LoadBB)
       return false;
@@ -562,8 +763,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   // the entry to its block.
   BasicBlock::iterator BBIt = LI;
 
-  if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, 
-                                                     BBIt, 6)) {
+  if (Value *AvailableVal = 
+        FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
     // If the value if the load is locally available within the block, just use
     // it.  This frequently occurs for reg2mem'd allocas.
     //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -646,7 +847,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     // Split them out to their own block.
     UnavailablePred =
       SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
-                             "thread-split", this);
+                             "thread-pre-split", this);
   }
   
   // If the value isn't available in all predecessors, then there will be
@@ -655,7 +856,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   if (UnavailablePred) {
     assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
            "Can't handle critical edge here!");
-    Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+    Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+                                 LI->getAlignment(),
                                  UnavailablePred->getTerminator());
     AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
   }
@@ -690,55 +892,183 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   return true;
 }
 
-
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block.  See if there are any simplifications we can do based on
-/// inputs to the phi node.
-/// 
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
-  BasicBlock *BB = PN->getParent();
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations.  Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+                    const SmallVectorImpl<std::pair<BasicBlock*,
+                                  BasicBlock*> > &PredToDestList) {
+  assert(!PredToDestList.empty());
+  
+  // Determine popularity.  If there are multiple possible destinations, we
+  // explicitly choose to ignore 'undef' destinations.  We prefer to thread
+  // blocks with known and real destinations to threading undef.  We'll handle
+  // them later if interesting.
+  DenseMap<BasicBlock*, unsigned> DestPopularity;
+  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+    if (PredToDestList[i].second)
+      DestPopularity[PredToDestList[i].second]++;
+  
+  // Find the most popular dest.
+  DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+  BasicBlock *MostPopularDest = DPI->first;
+  unsigned Popularity = DPI->second;
+  SmallVector<BasicBlock*, 4> SamePopularity;
+  
+  for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+    // If the popularity of this entry isn't higher than the popularity we've
+    // seen so far, ignore it.
+    if (DPI->second < Popularity)
+      ; // ignore.
+    else if (DPI->second == Popularity) {
+      // If it is the same as what we've seen so far, keep track of it.
+      SamePopularity.push_back(DPI->first);
+    } else {
+      // If it is more popular, remember it.
+      SamePopularity.clear();
+      MostPopularDest = DPI->first;
+      Popularity = DPI->second;
+    }      
+  }
   
-  // See if the phi node has any constant integer or undef values.  If so, we
-  // can determine where the corresponding predecessor will branch.
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *PredVal = PN->getIncomingValue(i);
-    
-    // Check to see if this input is a constant integer.  If so, the direction
-    // of the branch is predictable.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) {
-      // Merge any common predecessors that will act the same.
-      BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI);
+  // Okay, now we know the most popular destination.  If there is more than
+  // destination, we need to determine one.  This is arbitrary, but we need
+  // to make a deterministic decision.  Pick the first one that appears in the
+  // successor list.
+  if (!SamePopularity.empty()) {
+    SamePopularity.push_back(MostPopularDest);
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0; ; ++i) {
+      assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
       
-      BasicBlock *SuccBB;
-      if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
-        SuccBB = BI->getSuccessor(CI->isZero());
-      else {
-        SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
-        SuccBB = SI->getSuccessor(SI->findCaseValue(CI));
-      }
+      if (std::find(SamePopularity.begin(), SamePopularity.end(),
+                    TI->getSuccessor(i)) == SamePopularity.end())
+        continue;
       
-      // Ok, try to thread it!
-      return ThreadEdge(BB, PredBB, SuccBB);
+      MostPopularDest = TI->getSuccessor(i);
+      break;
     }
+  }
+  
+  // Okay, we have finally picked the most popular destination.
+  return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+  // If threading this would thread across a loop header, don't even try to
+  // thread the edge.
+  if (LoopHeaders.count(BB))
+    return false;
+  
+  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
+  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+    return false;
+  assert(!PredValues.empty() &&
+         "ComputeValueKnownInPredecessors returned true with no values");
+
+  DEBUG(errs() << "IN BB: " << *BB;
+        for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+          errs() << "  BB '" << BB->getName() << "': FOUND condition = ";
+          if (PredValues[i].first)
+            errs() << *PredValues[i].first;
+          else
+            errs() << "UNDEF";
+          errs() << " for pred '" << PredValues[i].second->getName()
+          << "'.\n";
+        });
+  
+  // Decide what we want to thread through.  Convert our list of known values to
+  // a list of known destinations for each pred.  This also discards duplicate
+  // predecessors and keeps track of the undefined inputs (which are represented
+  // as a null dest in the PredToDestList).
+  SmallPtrSet<BasicBlock*, 16> SeenPreds;
+  SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+  
+  BasicBlock *OnlyDest = 0;
+  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+  
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+    BasicBlock *Pred = PredValues[i].second;
+    if (!SeenPreds.insert(Pred))
+      continue;  // Duplicate predecessor entry.
     
-    // If the input is an undef, then it doesn't matter which way it will go.
-    // Pick an arbitrary dest and thread the edge.
-    if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) {
-      // Merge any common predecessors that will act the same.
-      BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV);
-      BasicBlock *SuccBB =
-        BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB));
-      
-      // Ok, try to thread it!
-      return ThreadEdge(BB, PredBB, SuccBB);
+    // If the predecessor ends with an indirect goto, we can't change its
+    // destination.
+    if (isa<IndirectBrInst>(Pred->getTerminator()))
+      continue;
+    
+    ConstantInt *Val = PredValues[i].first;
+    
+    BasicBlock *DestBB;
+    if (Val == 0)      // Undef.
+      DestBB = 0;
+    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+      DestBB = BI->getSuccessor(Val->isZero());
+    else {
+      SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+      DestBB = SI->getSuccessor(SI->findCaseValue(Val));
     }
+
+    // If we have exactly one destination, remember it for efficiency below.
+    if (i == 0)
+      OnlyDest = DestBB;
+    else if (OnlyDest != DestBB)
+      OnlyDest = MultipleDestSentinel;
+    
+    PredToDestList.push_back(std::make_pair(Pred, DestBB));
   }
   
-  // If the incoming values are all variables, we don't know the destination of
-  // any predecessors.  However, if any of the predecessor blocks end in an
-  // unconditional branch, we can *duplicate* the jump into that block in order
-  // to further encourage jump threading and to eliminate cases where we have
-  // branch on a phi of an icmp (branch on icmp is much better).
+  // If all edges were unthreadable, we fail.
+  if (PredToDestList.empty())
+    return false;
+  
+  // Determine which is the most common successor.  If we have many inputs and
+  // this block is a switch, we want to start by threading the batch that goes
+  // to the most popular destination first.  If we only know about one
+  // threadable destination (the common case) we can avoid this.
+  BasicBlock *MostPopularDest = OnlyDest;
+  
+  if (MostPopularDest == MultipleDestSentinel)
+    MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+  
+  // Now that we know what the most popular destination is, factor all
+  // predecessors that will jump to it into a single predecessor.
+  SmallVector<BasicBlock*, 16> PredsToFactor;
+  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+    if (PredToDestList[i].second == MostPopularDest) {
+      BasicBlock *Pred = PredToDestList[i].first;
+      
+      // This predecessor may be a switch or something else that has multiple
+      // edges to the block.  Factor each of these edges by listing them
+      // according to # occurrences in PredsToFactor.
+      TerminatorInst *PredTI = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+        if (PredTI->getSuccessor(i) == BB)
+          PredsToFactor.push_back(Pred);
+    }
+
+  // If the threadable edges are branching on an undefined value, we get to pick
+  // the destination that these predecessors should get to.
+  if (MostPopularDest == 0)
+    MostPopularDest = BB->getTerminator()->
+                            getSuccessor(GetBestDestForJumpOnUndef(BB));
+        
+  // Ok, try to thread it!
+  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
+/// the current block.  See if there are any simplifications we can do based on
+/// inputs to the phi node.
+/// 
+bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+  BasicBlock *BB = PN->getParent();
+  
+  // If any of the predecessor blocks end in an unconditional branch, we can
+  // *duplicate* the jump into that block in order to further encourage jump
+  // threading and to eliminate cases where we have branch on a phi of an icmp
+  // (branch on icmp is much better).
 
   // We don't want to do this tranformation for switches, because we don't
   // really want to duplicate a switch.
@@ -759,137 +1089,6 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
 }
 
 
-/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
-/// whose condition is an AND/OR where one side is PN.  If PN has constant
-/// operands that permit us to evaluate the condition for some operand, thread
-/// through the block.  For example with:
-///   br (and X, phi(Y, Z, false))
-/// the predecessor corresponding to the 'false' will always jump to the false
-/// destination of the branch.
-///
-bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
-                                           bool isAnd) {
-  // If this is a binary operator tree of the same AND/OR opcode, check the
-  // LHS/RHS.
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
-    if ((isAnd && BO->getOpcode() == Instruction::And) ||
-        (!isAnd && BO->getOpcode() == Instruction::Or)) {
-      if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd))
-        return true;
-      if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd))
-        return true;
-    }
-      
-  // If this isn't a PHI node, we can't handle it.
-  PHINode *PN = dyn_cast<PHINode>(V);
-  if (!PN || PN->getParent() != BB) return false;
-                                             
-  // We can only do the simplification for phi nodes of 'false' with AND or
-  // 'true' with OR.  See if we have any entries in the phi for this.
-  unsigned PredNo = ~0U;
-  ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()),
-                                          !isAnd);
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    if (PN->getIncomingValue(i) == PredCst) {
-      PredNo = i;
-      break;
-    }
-  }
-  
-  // If no match, bail out.
-  if (PredNo == ~0U)
-    return false;
-  
-  // If so, we can actually do this threading.  Merge any common predecessors
-  // that will act the same.
-  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
-  
-  // Next, figure out which successor we are threading to.  If this was an AND,
-  // the constant must be FALSE, and we must be targeting the 'false' block.
-  // If this is an OR, the constant must be TRUE, and we must be targeting the
-  // 'true' block.
-  BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
-  
-  // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
-/// hand sides of the compare instruction, try to determine the result. If the
-/// result can not be determined, a null pointer is returned.
-static Constant *GetResultOfComparison(CmpInst::Predicate pred,
-                                       Value *LHS, Value *RHS,
-                                       LLVMContext &Context) {
-  if (Constant *CLHS = dyn_cast<Constant>(LHS))
-    if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantExpr::getCompare(pred, CLHS, CRHS);
-
-  if (LHS == RHS)
-    if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
-      return ICmpInst::isTrueWhenEqual(pred) ? 
-                 ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);
-
-  return 0;
-}
-
-/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a value.  If we can identify when the comparison is true between
-/// the phi inputs and the value, we can fold the compare for that edge and
-/// thread through it.
-bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
-  PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
-  Value *RHS = Cmp->getOperand(1);
-  
-  // If the phi isn't in the current block, an incoming edge to this block
-  // doesn't control the destination.
-  if (PN->getParent() != BB)
-    return false;
-  
-  // We can do this simplification if any comparisons fold to true or false.
-  // See if any do.
-  Value *PredVal = 0;
-  bool TrueDirection = false;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    PredVal = PN->getIncomingValue(i);
-    
-    Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
-                                          RHS, Cmp->getContext());
-    if (!Res) {
-      PredVal = 0;
-      continue;
-    }
-    
-    // If this folded to a constant expr, we can't do anything.
-    if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
-      TrueDirection = ResC->getZExtValue();
-      break;
-    }
-    // If this folded to undef, just go the false way.
-    if (isa<UndefValue>(Res)) {
-      TrueDirection = false;
-      break;
-    }
-    
-    // Otherwise, we can't fold this input.
-    PredVal = 0;
-  }
-  
-  // If no match, bail out.
-  if (PredVal == 0)
-    return false;
-  
-  // If so, we can actually do this threading.  Merge any common predecessors
-  // that will act the same.
-  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
-  
-  // Next, get our successor.
-  BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
-  
-  // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-
 /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
 /// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
 /// NewPred using the entries from OldPred (suitably mapped).
@@ -914,10 +1113,11 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
   }
 }
 
-/// ThreadEdge - We have decided that it is safe and profitable to thread an
-/// edge from PredBB to SuccBB across BB.  Transform the IR to reflect this
-/// change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, 
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB.  Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB, 
+                               const SmallVectorImpl<BasicBlock*> &PredBBs, 
                                BasicBlock *SuccBB) {
   // If threading to the same block as we come from, we would infinite loop.
   if (SuccBB == BB) {
@@ -929,8 +1129,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   // If threading this would thread across a loop header, don't thread the edge.
   // See the comments above FindLoopHeaders for justifications and caveats.
   if (LoopHeaders.count(BB)) {
-    DEBUG(errs() << "  Not threading from '" << PredBB->getName()
-          << "' across loop header BB '" << BB->getName()
+    DEBUG(errs() << "  Not threading across loop header BB '" << BB->getName()
           << "' to dest BB '" << SuccBB->getName()
           << "' - it might create an irreducible loop!\n");
     return false;
@@ -943,6 +1142,17 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
     return false;
   }
   
+  // And finally, do it!  Start by factoring the predecessors is needed.
+  BasicBlock *PredBB;
+  if (PredBBs.size() == 1)
+    PredBB = PredBBs[0];
+  else {
+    DEBUG(errs() << "  Factoring out " << PredBBs.size()
+          << " common predecessors.\n");
+    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+                                    ".thr_comm", this);
+  }
+  
   // And finally, do it!
   DEBUG(errs() << "  Threading edge from '" << PredBB->getName() << "' to '"
         << SuccBB->getName() << "' with cost: " << JumpThreadCost
@@ -1034,7 +1244,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   TerminatorInst *PredTerm = PredBB->getTerminator();
   for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
     if (PredTerm->getSuccessor(i) == BB) {
-      BB->removePredecessor(PredBB);
+      RemovePredecessorAndSimplify(BB, PredBB, TD);
       PredTerm->setSuccessor(i, NewBB);
     }
   
@@ -1044,9 +1254,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   BI = NewBB->begin();
   for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
     Instruction *Inst = BI++;
-    if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
-      Inst->replaceAllUsesWith(C);
-      Inst->eraseFromParent();
+    
+    if (Value *V = SimplifyInstruction(Inst, TD)) {
+      WeakVH BIHandle(BI);
+      ReplaceAndSimplifyAllUses(Inst, V, TD);
+      if (BIHandle == 0)
+        BI = NewBB->begin();
       continue;
     }
     
@@ -1164,7 +1377,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   
   // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
   // that we nuked.
-  BB->removePredecessor(PredBB);
+  RemovePredecessorAndSimplify(BB, PredBB, TD);
   
   // Remove the unconditional branch at the end of the PredBB block.
   OldPredBranch->eraseFromParent();
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 756fbf3..104c873 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -263,7 +263,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Get the preheader block to move instructions into...
   Preheader = L->getLoopPreheader();
-  assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!");
 
   // Loop over the body of this loop, looking for calls, invokes, and stores.
   // Because subloops have already been incorporated into AST, we skip blocks in
@@ -286,12 +285,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   // us to sink instructions in one pass, without iteration.  After sinking
   // instructions, we perform another pass to hoist them out of the loop.
   //
-  SinkRegion(DT->getNode(L->getHeader()));
-  HoistRegion(DT->getNode(L->getHeader()));
+  if (L->hasDedicatedExits())
+    SinkRegion(DT->getNode(L->getHeader()));
+  if (Preheader)
+    HoistRegion(DT->getNode(L->getHeader()));
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can...
-  if (!DisablePromotion)
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits())
     PromoteValuesInLoop();
 
   // Clear out loops state information for the next iteration
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 866d8b4..48817ab 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -115,6 +115,10 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   if (!preheader)
     return false;
   
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->hasDedicatedExits())
+    return false;
+
   // We can't remove loops that contain subloops.  If the subloops were dead,
   // they would already have been removed in earlier executions of this pass.
   if (L->begin() != L->end())
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 920d85c..8b6a233 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -209,6 +209,10 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
   L = IncomingLoop;
   LPM = &LPM_Ref;
 
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
   // FIXME - Nested loops make dominator info updates tricky. 
   if (!L->getSubLoops().empty())
     return false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7a4bb35..5004483 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -49,6 +48,7 @@ namespace {
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<LoopInfo>();
       AU.addPreserved<LoopInfo>();
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
@@ -104,17 +104,18 @@ bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
 bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   L = Lp;
 
-  OrigHeader =  L->getHeader();
   OrigPreHeader = L->getLoopPreheader();
+  if (!OrigPreHeader) return false;
+
   OrigLatch = L->getLoopLatch();
+  if (!OrigLatch) return false;
+
+  OrigHeader =  L->getHeader();
 
   // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
 
-  assert(OrigHeader && OrigLatch && OrigPreHeader &&
-         "Loop is not in canonical form");
-
   // If the loop header is not one of the loop exiting blocks then
   // either this loop is already rotated or it is not
   // suitable for loop rotation transformations.
@@ -287,7 +288,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
                                                 "bb.nph",
                                                 OrigHeader->getParent(), 
                                                 NewHeader);
-  LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
+  LoopInfo &LI = getAnalysis<LoopInfo>();
   if (Loop *PL = LI.getLoopFor(OrigPreHeader))
     PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
   BranchInst::Create(NewHeader, NewPreHeader);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e20fb16..564c7ac 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -51,6 +51,7 @@ STATISTIC(NumEliminated,  "Number of strides eliminated");
 STATISTIC(NumShadow,      "Number of Shadow IVs optimized");
 STATISTIC(NumImmSunk,     "Number of common expr immediates sunk into uses");
 STATISTIC(NumLoopCond,    "Number of loop terminating conds optimized");
+STATISTIC(NumCountZero,   "Number of count iv optimized to count toward zero");
 
 static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
                                        cl::init(false),
@@ -107,7 +108,7 @@ namespace {
 
   public:
     static char ID; // Pass ID, replacement for typeid
-    explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : 
+    explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
       LoopPass(&ID), TLI(tli) {
     }
 
@@ -131,12 +132,10 @@ namespace {
     }
 
   private:
-    ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
-                                  IVStrideUse* &CondUse,
-                                  const SCEV *const *  &CondStride);
-
     void OptimizeIndvars(Loop *L);
-    void OptimizeLoopCountIV(Loop *L);
+
+    /// OptimizeLoopTermCond - Change loop terminating condition to use the
+    /// postinc iv when possible.
     void OptimizeLoopTermCond(Loop *L);
 
     /// OptimizeShadowIV - If IV is used in a int-to-float cast
@@ -148,8 +147,28 @@ namespace {
     ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
                           IVStrideUse* &CondUse);
 
+    /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
+    /// deciding when to exit the loop is used only for that purpose, try to
+    /// rearrange things so it counts down to a test against zero.
+    bool OptimizeLoopCountIV(Loop *L);
+    bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+                                     IVStrideUse* &CondUse, Loop *L);
+
+    /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
+    /// single stride of IV.  All of the users may have different starting
+    /// values, and this may not be the only stride.
+    void StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+                                      IVUsersOfOneStride &Uses,
+                                      Loop *L);
+    void StrengthReduceIVUsers(Loop *L);
+
+    ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+                                  IVStrideUse* &CondUse,
+                                  const SCEV* &CondStride,
+                                  bool PostPass = false);
+
     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                           const SCEV *const * &CondStride);
+                           const SCEV* &CondStride);
     bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
     const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
                              IVExpr&, const Type*,
@@ -164,6 +183,7 @@ namespace {
                               bool &AllUsesAreAddresses,
                               bool &AllUsesAreOutsideLoop,
                               std::vector<BasedUser> &UsersToProcess);
+    bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
     bool ShouldUseFullStrengthReductionMode(
                                 const std::vector<BasedUser> &UsersToProcess,
                                 const Loop *L,
@@ -188,9 +208,7 @@ namespace {
                                   Instruction *IVIncInsertPt,
                                   const Loop *L,
                                   SCEVExpander &PreheaderRewriter);
-    void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
-                                      IVUsersOfOneStride &Uses,
-                                      Loop *L);
+
     void DeleteTriviallyDeadInstructions();
   };
 }
@@ -208,11 +226,11 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
 /// their operands subsequently dead.
 void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
   if (DeadInsts.empty()) return;
-  
+
   while (!DeadInsts.empty()) {
     Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
     DeadInsts.pop_back();
-    
+
     if (I == 0 || !isInstructionTriviallyDead(I))
       continue;
 
@@ -223,14 +241,14 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
           DeadInsts.push_back(U);
       }
     }
-    
+
     I->eraseFromParent();
     Changed = true;
   }
 }
 
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a 
-/// subexpression that is an AddRec from a loop other than L.  An outer loop 
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L.  An outer loop
 /// of L is OK, but not an inner loop nor a disjoint loop.
 static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
   // This is very common, put it first.
@@ -256,7 +274,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
     return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
            containsAddRecFromDifferentLoop(DE->getRHS(), L);
 #if 0
-  // SCEVSDivExpr has been backed out temporarily, but will be back; we'll 
+  // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
   // need this when it is.
   if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
     return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
@@ -328,7 +346,7 @@ namespace {
     /// field to the Imm field (below).  BasedUser values are sorted by this
     /// field.
     const SCEV *Base;
-    
+
     /// Inst - The instruction using the induction variable.
     Instruction *Inst;
 
@@ -352,11 +370,11 @@ namespace {
     // instruction for a loop and uses outside the loop that are dominated by
     // the loop.
     bool isUseOfPostIncrementedValue;
-    
+
     BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
       : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
         OperandValToReplace(IVSU.getOperandValToReplace()),
-        Imm(SE->getIntegerSCEV(0, Base->getType())), 
+        Imm(SE->getIntegerSCEV(0, Base->getType())),
         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
 
     // Once we rewrite the code to insert the new IVs we want, update the
@@ -367,8 +385,8 @@ namespace {
                                        SCEVExpander &Rewriter, Loop *L, Pass *P,
                                         LoopInfo &LI,
                                         SmallVectorImpl<WeakVH> &DeadInsts);
-    
-    Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, 
+
+    Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
                                        const Type *Ty,
                                        SCEVExpander &Rewriter,
                                        Instruction *IP, Loop *L,
@@ -383,7 +401,7 @@ void BasedUser::dump() const {
   errs() << "   Inst: " << *Inst;
 }
 
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, 
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
                                               const Type *Ty,
                                               SCEVExpander &Rewriter,
                                               Instruction *IP, Loop *L,
@@ -393,10 +411,10 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
   // want to insert this expression before the user, we'd rather pull it out as
   // many loops as possible.
   Instruction *BaseInsertPt = IP;
-  
+
   // Figure out the most-nested loop that IP is in.
   Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-  
+
   // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
   // the preheader of the outer-most loop where NewBase is not loop invariant.
   if (L->contains(IP->getParent()))
@@ -404,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
       BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
       InsertLoop = InsertLoop->getParentLoop();
     }
-  
+
   Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
 
   const SCEV *NewValSCEV = SE->getUnknown(Base);
@@ -430,7 +448,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
   if (!isa<PHINode>(Inst)) {
     // By default, insert code at the user instruction.
     BasicBlock::iterator InsertPt = Inst;
-    
+
     // However, if the Operand is itself an instruction, the (potentially
     // complex) inserted code may be shared by many users.  Because of this, we
     // want to emit code for the computation of the operand right before its old
@@ -442,7 +460,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
     //
     // If this is a use outside the loop (which means after, since it is based
     // on a loop indvar) we use the post-incremented value, so that we don't
-    // artificially make the preinc value live out the bottom of the loop. 
+    // artificially make the preinc value live out the bottom of the loop.
     if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
       if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
         InsertPt = NewBasePt;
@@ -477,7 +495,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
     if (PN->getIncomingValue(i) == OperandValToReplace) {
       // If the original expression is outside the loop, put the replacement
       // code in the same place as the original expression,
-      // which need not be an immediate predecessor of this PHI.  This way we 
+      // which need not be an immediate predecessor of this PHI.  This way we
       // need only one copy of it even if it is referenced multiple times in
       // the PHI.  We don't do this when the original expression is inside the
       // loop because multiple copies sometimes do useful sinking of code in
@@ -490,6 +508,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
         // is the canonical backedge for this loop, as this can make some
         // inserted code be in an illegal position.
         if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+            !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
             (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
 
           // First step, split the critical edge.
@@ -572,11 +591,11 @@ static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
 static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
                                              Loop *L, ScalarEvolution *SE) {
   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
-  
+
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
     SmallVector<const SCEV *, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
-    
+
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
       if (!SAE->getOperand(i)->isLoopInvariant(L)) {
         // If this is a loop-variant expression, it must stay in the immediate
@@ -594,7 +613,7 @@ static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
     // Try to pull immediates out of the start value of nested addrec's.
     const SCEV *Start = SARE->getStart();
     MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-    
+
     SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
     Ops[0] = Start;
     Val = SE->getAddRecExpr(Ops, SARE->getLoop());
@@ -617,11 +636,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
     SmallVector<const SCEV *, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
-    
+
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
       const SCEV *NewOp = SAE->getOperand(i);
       MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-      
+
       if (!NewOp->isLoopInvariant(L)) {
         // If this is a loop-variant expression, it must stay in the immediate
         // field of the expression.
@@ -640,7 +659,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
     // Try to pull immediates out of the start value of nested addrec's.
     const SCEV *Start = SARE->getStart();
     MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-    
+
     if (Start != SARE->getStart()) {
       SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Start;
@@ -656,8 +675,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,
       const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
       const SCEV *NewOp = SME->getOperand(1);
       MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-      
-      // If we extracted something out of the subexpressions, see if we can 
+
+      // If we extracted something out of the subexpressions, see if we can
       // simplify this!
       if (NewOp != SME->getOperand(1)) {
         // Scale SubImm up by "8".  If the result is a target constant, we are
@@ -666,7 +685,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
         if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
           // Accumulate the immediate.
           Imm = SE->getAddExpr(Imm, SubImm);
-          
+
           // Update what is left of 'Val'.
           Val = SE->getMulExpr(SME->getOperand(0), NewOp);
           return;
@@ -714,7 +733,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
       SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Zero;   // Start with zero base.
       SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
-      
+
 
       SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
     }
@@ -724,7 +743,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
   }
 }
 
-// This is logically local to the following function, but C++ says we have 
+// This is logically local to the following function, but C++ says we have
 // to make it file scope.
 struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
 
@@ -762,7 +781,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   // an addressing mode "for free"; such expressions are left within the loop.
   // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
   std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-  
+
   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
   // order we see them.
   SmallVector<const SCEV *, 16> UniqueSubExprs;
@@ -779,7 +798,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
     if (!L->contains(Uses[i].Inst->getParent()))
       continue;
     NumUsesInsideLoop++;
-    
+
     // If the base is zero (which is common), return zero now, there are no
     // CSEs we can find.
     if (Uses[i].Base == Zero) return Zero;
@@ -811,13 +830,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   // Now that we know how many times each is used, build Result.  Iterate over
   // UniqueSubexprs so that we have a stable ordering.
   for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
-    std::map<const SCEV *, SubExprUseData>::iterator I = 
+    std::map<const SCEV *, SubExprUseData>::iterator I =
        SubExpressionUseData.find(UniqueSubExprs[i]);
     assert(I != SubExpressionUseData.end() && "Entry not found?");
-    if (I->second.Count == NumUsesInsideLoop) { // Found CSE! 
+    if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
       if (I->second.notAllUsesAreFree)
         Result = SE->getAddExpr(Result, I->first);
-      else 
+      else
         FreeResult = SE->getAddExpr(FreeResult, I->first);
     } else
       // Remove non-cse's from SubExpressionUseData.
@@ -849,13 +868,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
 
   // If we found no CSE's, return now.
   if (Result == Zero) return Result;
-  
+
   // If we still have a FreeResult, remove its subexpressions from
   // SubExpressionUseData.  This means they will remain in the use Bases.
   if (FreeResult != Zero) {
     SeparateSubExprs(SubExprs, FreeResult, SE);
     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      std::map<const SCEV *, SubExprUseData>::iterator I = 
+      std::map<const SCEV *, SubExprUseData>::iterator I =
          SubExpressionUseData.find(SubExprs[j]);
       SubExpressionUseData.erase(I);
     }
@@ -882,7 +901,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
         SubExprs.erase(SubExprs.begin()+j);
         --j; --e;
       }
-    
+
     // Finally, add the non-shared expressions together.
     if (SubExprs.empty())
       Uses[i].Base = Zero;
@@ -890,11 +909,11 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
       Uses[i].Base = SE->getAddExpr(SubExprs);
     SubExprs.clear();
   }
- 
+
   return Result;
 }
 
-/// ValidScale - Check whether the given Scale is valid for all loads and 
+/// ValidScale - Check whether the given Scale is valid for all loads and
 /// stores in UsersToProcess.
 ///
 bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
@@ -911,7 +930,7 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
       AccessTy = getAccessType(UsersToProcess[i].Inst);
     else if (isa<PHINode>(UsersToProcess[i].Inst))
       continue;
-    
+
     TargetLowering::AddrMode AM;
     if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
       AM.BaseOffs = SC->getValue()->getSExtValue();
@@ -983,13 +1002,13 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
 /// reuse is possible.  Factors can be negative on same targets, e.g. ARM.
 ///
 /// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant; 
-/// a multiply (executed once) outside the loop is better than another IV 
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
 /// within.  Well, usually.
 const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
                                 bool AllUsesAreAddresses,
                                 bool AllUsesAreOutsideLoop,
-                                const SCEV *const &Stride, 
+                                const SCEV *const &Stride,
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
   if (StrideNoReuse.count(Stride))
@@ -999,11 +1018,16 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     int64_t SInt = SC->getValue()->getSExtValue();
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
           StrideNoReuse.count(SI->first))
         continue;
+      // The other stride has no uses, don't reuse it.
+      std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
+        IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
+      if (UI->second->Users.empty())
+        continue;
       int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
       if (SI->first != Stride &&
           (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
@@ -1052,7 +1076,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     // an existing IV if we can.
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
         continue;
@@ -1072,9 +1096,9 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     // -1*old.
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
                 IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end()) 
+      if (SI == IVsByStride.end())
         continue;
       if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
         if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
@@ -1104,18 +1128,18 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
 static bool isNonConstantNegative(const SCEV *const &Expr) {
   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
   if (!Mul) return false;
-  
+
   // If there is a constant factor, it will be first.
   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
   if (!SC) return false;
-  
+
   // Return true if the value is negative, this matches things like (-42 * V).
   return SC->getValue()->getValue().isNegative();
 }
 
 /// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-/// of the strided accesses, as well as the old information from Uses. We
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
+/// base of the strided accesses, as well as the old information from Uses. We
 /// progressively move information from the Base field to the Imm field, until
 /// we eventually have the full access expression to rewrite the use.
 const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
@@ -1145,7 +1169,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
   // We now have a whole bunch of uses of like-strided induction variables, but
   // they might all have different bases.  We want to emit one PHI node for this
   // stride which we fold as many common expressions (between the IVs) into as
-  // possible.  Start by identifying the common expressions in the base values 
+  // possible.  Start by identifying the common expressions in the base values
   // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
   // "A+B"), emit it to the preheader, then remove the expression from the
   // UsersToProcess base values.
@@ -1165,11 +1189,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
     if (!L->contains(UsersToProcess[i].Inst->getParent())) {
       UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
                                              UsersToProcess[i].Base);
-      UsersToProcess[i].Base = 
+      UsersToProcess[i].Base =
         SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
     } else {
       // Not all uses are outside the loop.
-      AllUsesAreOutsideLoop = false; 
+      AllUsesAreOutsideLoop = false;
 
       // Addressing modes can be folded into loads and stores.  Be careful that
       // the store is through the expression, not of the expression though.
@@ -1183,11 +1207,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
 
       if (isAddress)
         HasAddress = true;
-     
+
       // If this use isn't an address, then not all uses are addresses.
       if (!isAddress && !isPHI)
         AllUsesAreAddresses = false;
-      
+
       MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
                           UsersToProcess[i].Imm, isAddress, L, SE);
     }
@@ -1198,7 +1222,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
   // for one fewer iv.
   if (NumPHI > 1)
     AllUsesAreAddresses = false;
-    
+
   // There are no in-loop address uses.
   if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
     AllUsesAreAddresses = false;
@@ -1491,12 +1515,13 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
   return true;
 }
 
-/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
 /// stride of IV.  All of the users may have different starting values, and this
 /// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
-                                                      IVUsersOfOneStride &Uses,
-                                                      Loop *L) {
+void
+LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+                                                  IVUsersOfOneStride &Uses,
+                                                  Loop *L) {
   // If all the users are moved to another stride, then there is nothing to do.
   if (Uses.Users.empty())
     return;
@@ -1518,8 +1543,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
   // have the full access expression to rewrite the use.
   std::vector<BasedUser> UsersToProcess;
   const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
-                                          AllUsesAreOutsideLoop,
-                                          UsersToProcess);
+                                           AllUsesAreOutsideLoop,
+                                           UsersToProcess);
 
   // Sort the UsersToProcess array so that users with common bases are
   // next to each other.
@@ -1588,12 +1613,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
   const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
   IVExpr   ReuseIV(SE->getIntegerSCEV(0,
                                     Type::getInt32Ty(Preheader->getContext())),
-                   SE->getIntegerSCEV(0, 
+                   SE->getIntegerSCEV(0,
                                     Type::getInt32Ty(Preheader->getContext())),
                    0);
 
-  /// Choose a strength-reduction strategy and prepare for it by creating
-  /// the necessary PHIs and adjusting the bookkeeping.
+  // Choose a strength-reduction strategy and prepare for it by creating
+  // the necessary PHIs and adjusting the bookkeeping.
   if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
                                          AllUsesAreAddresses, Stride)) {
     PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
@@ -1606,7 +1631,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
     // If all uses are addresses, check if it is possible to reuse an IV.  The
     // new IV must have a stride that is a multiple of the old stride; the
     // multiple must be a number that can be encoded in the scale field of the
-    // target addressing mode; and we must have a valid instruction after this 
+    // target addressing mode; and we must have a valid instruction after this
     // substitution, including the immediate field, if any.
     RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
                                     AllUsesAreOutsideLoop,
@@ -1649,7 +1674,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
         // We want this constant emitted into the preheader! This is just
         // using cast as a copy so BitCast (no-op cast) is appropriate
         BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
-                                PreInsertPt);       
+                                PreInsertPt);
       }
     }
 
@@ -1723,7 +1748,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
             assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
                    SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
                    "Unexpected lengthening conversion!");
-            typedBase = SE->getTruncateExpr(ReuseIV.Base, 
+            typedBase = SE->getTruncateExpr(ReuseIV.Base,
                                             RewriteExpr->getType());
           }
           RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
@@ -1775,11 +1800,29 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
   // different starting values, into different PHIs.
 }
 
+void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
+  // Note: this processes each stride/type pair individually.  All users
+  // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
+  // Also, note that we iterate over IVUsesByStride indirectly by using
+  // StrideOrder. This extra layer of indirection makes the ordering of
+  // strides deterministic - not dependent on map order.
+  for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+    // FIXME: Generalize to non-affine IV's.
+    if (!SI->first->isLoopInvariant(L))
+      continue;
+    StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+  }
+}
+
 /// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
 /// set the IV user and stride information and return true, otherwise return
 /// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                                       const SCEV *const * &CondStride) {
+bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
+                                           IVStrideUse *&CondUse,
+                                           const SCEV* &CondStride) {
   for (unsigned Stride = 0, e = IU->StrideOrder.size();
        Stride != e && !CondUse; ++Stride) {
     std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -1793,12 +1836,12 @@ bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse
         // InstCombine does it as well for simple uses, it's not clear that it
         // occurs enough in real life to handle.
         CondUse = UI;
-        CondStride = &SI->first;
+        CondStride = SI->first;
         return true;
       }
   }
   return false;
-}    
+}
 
 namespace {
   // Constant strides come first which in turns are sorted by their absolute
@@ -1851,8 +1894,9 @@ namespace {
 /// v1 = v1 + 3
 /// if (v1 < 30) goto loop
 ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
-                                                IVStrideUse* &CondUse,
-                                              const SCEV *const* &CondStride) {
+                                                  IVStrideUse* &CondUse,
+                                                  const SCEV* &CondStride,
+                                                  bool PostPass) {
   // If there's only one stride in the loop, there's nothing to do here.
   if (IU->StrideOrder.size() < 2)
     return Cond;
@@ -1860,23 +1904,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
   // trying to change the condition because the stride will still
   // remain.
   std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
-    IU->IVUsesByStride.find(*CondStride);
-  if (I == IU->IVUsesByStride.end() ||
-      I->second->Users.size() != 1)
+    IU->IVUsesByStride.find(CondStride);
+  if (I == IU->IVUsesByStride.end())
     return Cond;
+  if (I->second->Users.size() > 1) {
+    for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
+           EE = I->second->Users.end(); II != EE; ++II) {
+      if (II->getUser() == Cond)
+        continue;
+      if (!isInstructionTriviallyDead(II->getUser()))
+        return Cond;
+    }
+  }
   // Only handle constant strides for now.
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
   if (!SC) return Cond;
 
   ICmpInst::Predicate Predicate = Cond->getPredicate();
   int64_t CmpSSInt = SC->getValue()->getSExtValue();
-  unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
+  unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
   uint64_t SignBit = 1ULL << (BitWidth-1);
   const Type *CmpTy = Cond->getOperand(0)->getType();
   const Type *NewCmpTy = NULL;
   unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
   unsigned NewTyBits = 0;
-  const SCEV **NewStride = NULL;
+  const SCEV *NewStride = NULL;
   Value *NewCmpLHS = NULL;
   Value *NewCmpRHS = NULL;
   int64_t Scale = 1;
@@ -1885,16 +1937,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
   if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
     int64_t CmpVal = C->getValue().getSExtValue();
 
+    // Check the relevant induction variable for conformance to
+    // the pattern.
+    const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+    if (!AR || !AR->isAffine())
+      return Cond;
+
+    const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
     // Check stride constant and the comparision constant signs to detect
     // overflow.
-    if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
-      return Cond;
+    if (StartC) {
+      if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
+          (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
+        return Cond;
+    } else {
+      // More restrictive check for the other cases.
+      if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
+        return Cond;
+    }
 
     // Look for a suitable stride / iv as replacement.
     for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
       std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
         IU->IVUsesByStride.find(IU->StrideOrder[i]);
-      if (!isa<SCEVConstant>(SI->first))
+      if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
         continue;
       int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
       if (SSInt == CmpSSInt ||
@@ -1904,6 +1971,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       Scale = SSInt / CmpSSInt;
       int64_t NewCmpVal = CmpVal * Scale;
+
+      // If old icmp value fits in icmp immediate field, but the new one doesn't
+      // try something else.
+      if (TLI &&
+          TLI->isLegalICmpImmediate(CmpVal) &&
+          !TLI->isLegalICmpImmediate(NewCmpVal))
+        continue;
+
       APInt Mul = APInt(BitWidth*2, CmpVal, true);
       Mul = Mul * APInt(BitWidth*2, Scale, true);
       // Check for overflow.
@@ -1918,8 +1993,6 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
           (CmpVal & SignBit) != (NewCmpVal & SignBit))
         continue;
 
-      if (NewCmpVal == CmpVal)
-        continue;
       // Pick the best iv to use trying to avoid a cast.
       NewCmpLHS = NULL;
       for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
@@ -1969,19 +2042,21 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
       if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
         continue;
 
-      bool AllUsesAreAddresses = true;
-      bool AllUsesAreOutsideLoop = true;
-      std::vector<BasedUser> UsersToProcess;
-      const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
-                                              AllUsesAreAddresses,
-                                              AllUsesAreOutsideLoop,
-                                              UsersToProcess);
-      // Avoid rewriting the compare instruction with an iv of new stride
-      // if it's likely the new stride uses will be rewritten using the
-      // stride of the compare instruction.
-      if (AllUsesAreAddresses &&
-          ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
-        continue;
+      if (!PostPass) {
+        bool AllUsesAreAddresses = true;
+        bool AllUsesAreOutsideLoop = true;
+        std::vector<BasedUser> UsersToProcess;
+        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+                                                 AllUsesAreAddresses,
+                                                 AllUsesAreOutsideLoop,
+                                                 UsersToProcess);
+        // Avoid rewriting the compare instruction with an iv of new stride
+        // if it's likely the new stride uses will be rewritten using the
+        // stride of the compare instruction.
+        if (AllUsesAreAddresses &&
+            ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+          continue;
+      }
 
       // Avoid rewriting the compare instruction with an iv which has
       // implicit extension or truncation built into it.
@@ -1994,7 +2069,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
       if (Scale < 0 && !Cond->isEquality())
         Predicate = ICmpInst::getSwappedPredicate(Predicate);
 
-      NewStride = &IU->StrideOrder[i];
+      NewStride = IU->StrideOrder[i];
       if (!isa<PointerType>(NewCmpTy))
         NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
       else {
@@ -2031,13 +2106,16 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
     Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
                         L->getHeader()->getName() + ".termcond");
 
+    DEBUG(errs() << "    Change compare stride in Inst " << *OldCond);
+    DEBUG(errs() << " to " << *Cond << '\n');
+
     // Remove the old compare instruction. The old indvar is probably dead too.
     DeadInsts.push_back(CondUse->getOperandValToReplace());
     OldCond->replaceAllUsesWith(Cond);
     OldCond->eraseFromParent();
 
-    IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
-    CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
+    IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
+    CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
     CondStride = NewStride;
     ++NumEliminated;
     Changed = true;
@@ -2180,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return;
-    
+
   for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
        ++Stride) {
     std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -2199,13 +2277,13 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       /* If shadow use is a int->float cast then insert a second IV
          to eliminate this cast.
 
-           for (unsigned i = 0; i < n; ++i) 
+           for (unsigned i = 0; i < n; ++i)
              foo((double)i);
 
          is transformed into
 
            double d = 0.0;
-           for (unsigned i = 0; i < n; ++i, ++d) 
+           for (unsigned i = 0; i < n; ++i, ++d)
              foo(d);
       */
       if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
@@ -2227,7 +2305,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
 
       const Type *SrcTy = PH->getType();
       int Mantissa = DestTy->getFPMantissaWidth();
-      if (Mantissa == -1) continue; 
+      if (Mantissa == -1) continue;
       if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
         continue;
 
@@ -2239,12 +2317,12 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
         Entry = 1;
         Latch = 0;
       }
-        
+
       ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
       if (!Init) continue;
       Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
-      BinaryOperator *Incr = 
+      BinaryOperator *Incr =
         dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
       if (!Incr) continue;
       if (Incr->getOpcode() != Instruction::Add
@@ -2271,7 +2349,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
 
       /* create new increment. '++d' in above example. */
       Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
-      BinaryOperator *NewIncr = 
+      BinaryOperator *NewIncr =
         BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
                                  Instruction::FAdd : Instruction::FSub,
                                NewPH, CFP, "IV.S.next.", Incr);
@@ -2297,237 +2375,385 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
   OptimizeShadowIV(L);
 }
 
-/// OptimizeLoopTermCond - Change loop terminating condition to use the 
+bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
+                                             bool CheckPreInc) {
+  int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+      IU->IVUsesByStride.find(IU->StrideOrder[i]);
+    const SCEV *Share = SI->first;
+    if (!isa<SCEVConstant>(SI->first) || Share == Stride)
+      continue;
+    int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
+    if (SSInt == SInt)
+      return true; // This can definitely be reused.
+    if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
+      continue;
+    int64_t Scale = SSInt / SInt;
+    bool AllUsesAreAddresses = true;
+    bool AllUsesAreOutsideLoop = true;
+    std::vector<BasedUser> UsersToProcess;
+    const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+                                             AllUsesAreAddresses,
+                                             AllUsesAreOutsideLoop,
+                                             UsersToProcess);
+    if (AllUsesAreAddresses &&
+        ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
+      if (!CheckPreInc)
+        return true;
+      // Any pre-inc iv use?
+      IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
+      for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+             E = StrideUses.Users.end(); I != E; ++I) {
+        if (!I->isUseOfPostIncrementedValue())
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
+/// conditional branch or it's and / or with other conditions before being used
+/// as the condition.
+static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
+  BasicBlock *CondBB = Cond->getParent();
+  if (!L->isLoopExiting(CondBB))
+    return false;
+  BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
+  if (!TermBr || !TermBr->isConditional())
+    return false;
+
+  Value *User = *Cond->use_begin();
+  Instruction *UserInst = dyn_cast<Instruction>(User);
+  while (UserInst &&
+         (UserInst->getOpcode() == Instruction::And ||
+          UserInst->getOpcode() == Instruction::Or)) {
+    if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
+      return false;
+    User = *User->use_begin();
+    UserInst = dyn_cast<Instruction>(User);
+  }
+  return User == TermBr;
+}
+
+static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
+                              ScalarEvolution *SE, Loop *L,
+                              const TargetLowering *TLI = 0) {
+  if (!L->contains(Cond->getParent()))
+    return false;
+
+  if (!isa<SCEVConstant>(CondUse->getOffset()))
+    return false;
+
+  // Handle only tests for equality for the moment.
+  if (!Cond->isEquality() || !Cond->hasOneUse())
+    return false;
+  if (!isUsedByExitBranch(Cond, L))
+    return false;
+
+  Value *CondOp0 = Cond->getOperand(0);
+  const SCEV *IV = SE->getSCEV(CondOp0);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+  if (!AR || !AR->isAffine())
+    return false;
+
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+  if (!SC || SC->getValue()->getSExtValue() < 0)
+    // If it's already counting down, don't do anything.
+    return false;
+
+  // If the RHS of the comparison is not an loop invariant, the rewrite
+  // cannot be done. Also bail out if it's already comparing against a zero.
+  // If we are checking this before cmp stride optimization, check if it's
+  // comparing against a already legal immediate.
+  Value *RHS = Cond->getOperand(1);
+  ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+  if (!L->isLoopInvariant(RHS) ||
+      (RHSC && RHSC->isZero()) ||
+      (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
+    return false;
+
+  // Make sure the IV is only used for counting.  Value may be preinc or
+  // postinc; 2 uses in either case.
+  if (!CondOp0->hasNUses(2))
+    return false;
+
+  return true;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
 /// postinc iv when possible.
 void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
-  // Finally, get the terminating condition for the loop if possible.  If we
-  // can, we want to change it to use a post-incremented version of its
-  // induction variable, to allow coalescing the live ranges for the IV into
-  // one register value.
   BasicBlock *LatchBlock = L->getLoopLatch();
-  BasicBlock *ExitingBlock = L->getExitingBlock();
-  
-  if (!ExitingBlock)
-    // Multiple exits, just look at the exit in the latch block if there is one.
-    ExitingBlock = LatchBlock;
-  BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (!TermBr)
-    return;
-  if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
-    return;
+  bool LatchExit = L->isLoopExiting(LatchBlock);
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
 
-  // Search IVUsesByStride to find Cond's IVUse if there is one.
-  IVStrideUse *CondUse = 0;
-  const SCEV *const *CondStride = 0;
-  ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
-  if (!FindIVUserForCond(Cond, CondUse, CondStride))
-    return; // setcc doesn't use the IV.
-
-  if (ExitingBlock != LatchBlock) {
-    if (!Cond->hasOneUse())
-      // See below, we don't want the condition to be cloned.
-      return;
-
-    // If exiting block is the latch block, we know it's safe and profitable to
-    // transform the icmp to use post-inc iv. Otherwise do so only if it would
-    // not reuse another iv and its iv would be reused by other uses. We are
-    // optimizing for the case where the icmp is the only use of the iv.
-    IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
-    for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
-         E = StrideUses.Users.end(); I != E; ++I) {
-      if (I->getUser() == Cond)
-        continue;
-      if (!I->isUseOfPostIncrementedValue())
-        return;
-    }
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitingBlock = ExitingBlocks[i];
 
-    // FIXME: This is expensive, and worse still ChangeCompareStride does a
-    // similar check. Can we perform all the icmp related transformations after
-    // StrengthReduceStridedIVUsers?
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
-      int64_t SInt = SC->getValue()->getSExtValue();
-      for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
-           ++NewStride) {
-        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-          IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
-        if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
-          continue;
-        int64_t SSInt =
-          cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-        if (SSInt == SInt)
-          return; // This can definitely be reused.
-        if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
-          continue;
-        int64_t Scale = SSInt / SInt;
-        bool AllUsesAreAddresses = true;
-        bool AllUsesAreOutsideLoop = true;
-        std::vector<BasedUser> UsersToProcess;
-        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
-                                                AllUsesAreAddresses,
-                                                AllUsesAreOutsideLoop,
-                                                UsersToProcess);
-        // Avoid rewriting the compare instruction with an iv of new stride
-        // if it's likely the new stride uses will be rewritten using the
-        // stride of the compare instruction.
-        if (AllUsesAreAddresses &&
-            ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
-          return;
-      }
-    }
+    // Finally, get the terminating condition for the loop if possible.  If we
+    // can, we want to change it to use a post-incremented version of its
+    // induction variable, to allow coalescing the live ranges for the IV into
+    // one register value.
 
-    StrideNoReuse.insert(*CondStride);
-  }
+    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+    if (!TermBr)
+      continue;
+    // FIXME: Overly conservative, termination condition could be an 'or' etc..
+    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+      continue;
 
-  // If the trip count is computed in terms of a max (due to ScalarEvolution
-  // being unable to find a sufficient guard, for example), change the loop
-  // comparison to use SLT or ULT instead of NE.
-  Cond = OptimizeMax(L, Cond, CondUse);
-
-  // If possible, change stride and operands of the compare instruction to
-  // eliminate one stride.
-  if (ExitingBlock == LatchBlock)
-    Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
-
-  // It's possible for the setcc instruction to be anywhere in the loop, and
-  // possible for it to have multiple users.  If it is not immediately before
-  // the latch block branch, move it.
-  if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
-    if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
-      Cond->moveBefore(TermBr);
-    } else {
-      // Otherwise, clone the terminating condition and insert into the loopend.
-      Cond = cast<ICmpInst>(Cond->clone());
-      Cond->setName(L->getHeader()->getName() + ".termcond");
-      LatchBlock->getInstList().insert(TermBr, Cond);
-      
-      // Clone the IVUse, as the old use still exists!
-      IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
-                                             CondUse->getOperandValToReplace());
-      CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
+    // Search IVUsesByStride to find Cond's IVUse if there is one.
+    IVStrideUse *CondUse = 0;
+    const SCEV *CondStride = 0;
+    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+    if (!FindIVUserForCond(Cond, CondUse, CondStride))
+      continue;
+
+    // If the latch block is exiting and it's not a single block loop, it's
+    // not safe to use postinc iv in other exiting blocks. FIXME: overly
+    // conservative? How about icmp stride optimization?
+    bool UsePostInc =  !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
+    if (UsePostInc && ExitingBlock != LatchBlock) {
+      if (!Cond->hasOneUse())
+        // See below, we don't want the condition to be cloned.
+        UsePostInc = false;
+      else {
+        // If exiting block is the latch block, we know it's safe and profitable
+        // to transform the icmp to use post-inc iv. Otherwise do so only if it
+        // would not reuse another iv and its iv would be reused by other uses.
+        // We are optimizing for the case where the icmp is the only use of the
+        // iv.
+        IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
+        for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+               E = StrideUses.Users.end(); I != E; ++I) {
+          if (I->getUser() == Cond)
+            continue;
+          if (!I->isUseOfPostIncrementedValue()) {
+            UsePostInc = false;
+            break;
+          }
+        }
+      }
+
+      // If iv for the stride might be shared and any of the users use pre-inc
+      // iv might be used, then it's not safe to use post-inc iv.
+      if (UsePostInc &&
+          isa<SCEVConstant>(CondStride) &&
+          StrideMightBeShared(CondStride, L, true))
+        UsePostInc = false;
     }
-  }
 
-  // If we get to here, we know that we can transform the setcc instruction to
-  // use the post-incremented version of the IV, allowing us to coalesce the
-  // live ranges for the IV correctly.
-  CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
-  CondUse->setIsUseOfPostIncrementedValue(true);
-  Changed = true;
+    // If the trip count is computed in terms of a max (due to ScalarEvolution
+    // being unable to find a sufficient guard, for example), change the loop
+    // comparison to use SLT or ULT instead of NE.
+    Cond = OptimizeMax(L, Cond, CondUse);
+
+    // If possible, change stride and operands of the compare instruction to
+    // eliminate one stride. However, avoid rewriting the compare instruction
+    // with an iv of new stride if it's likely the new stride uses will be
+    // rewritten using the stride of the compare instruction.
+    if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
+      // If the condition stride is a constant and it's the only use, we might
+      // want to optimize it first by turning it to count toward zero.
+      if (!StrideMightBeShared(CondStride, L, false) &&
+          !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
+        Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+    }
 
-  ++NumLoopCond;
-}
+    if (!UsePostInc)
+      continue;
 
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+    DEBUG(errs() << "  Change loop exiting icmp to use postinc iv: "
+          << *Cond << '\n');
 
-  // If the number of times the loop is executed isn't computable, give up.
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    return;
+    // It's possible for the setcc instruction to be anywhere in the loop, and
+    // possible for it to have multiple users.  If it is not immediately before
+    // the exiting block branch, move it.
+    if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+      if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
+        Cond->moveBefore(TermBr);
+      } else {
+        // Otherwise, clone the terminating condition and insert into the
+        // loopend.
+        Cond = cast<ICmpInst>(Cond->clone());
+        Cond->setName(L->getHeader()->getName() + ".termcond");
+        ExitingBlock->getInstList().insert(TermBr, Cond);
+
+        // Clone the IVUse, as the old use still exists!
+        IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
+                                             CondUse->getOperandValToReplace());
+        CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+      }
+    }
 
-  // Get the terminating condition for the loop if possible (this isn't
-  // necessarily in the latch, or a block that's a predecessor of the header).
-  if (!L->getExitBlock())
-    return; // More than one loop exit blocks.
+    // If we get to here, we know that we can transform the setcc instruction to
+    // use the post-incremented version of the IV, allowing us to coalesce the
+    // live ranges for the IV correctly.
+    CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
+    CondUse->setIsUseOfPostIncrementedValue(true);
+    Changed = true;
 
-  // Okay, there is one exit block.  Try to find the condition that causes the
-  // loop to be exited.
-  BasicBlock *ExitingBlock = L->getExitingBlock();
-  if (!ExitingBlock)
-    return; // More than one block exiting!
+    ++NumLoopCond;
+  }
+}
 
-  // Okay, we've computed the exiting block.  See what condition causes us to
-  // exit.
-  //
-  // FIXME: we should be able to handle switch instructions (with a single exit)
-  BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (TermBr == 0) return;
-  assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
-  if (!isa<ICmpInst>(TermBr->getCondition()))
-    return;
-  ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+                                                     IVStrideUse* &CondUse,
+                                                     Loop *L) {
+  // If the only use is an icmp of a loop exiting conditional branch, then
+  // attempt the optimization.
+  BasedUser User = BasedUser(*CondUse, SE);
+  assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
+  ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+
+  // Less strict check now that compare stride optimization is done.
+  if (!ShouldCountToZero(Cond, CondUse, SE, L))
+    return false;
 
-  // Handle only tests for equality for the moment, and only stride 1.
-  if (Cond->getPredicate() != CmpInst::ICMP_EQ)
-    return;
-  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
-  if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
-    return;
-  // If the RHS of the comparison is defined inside the loop, the rewrite
-  // cannot be done.
-  if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
-    if (L->contains(CR->getParent()))
-      return;
+  Value *CondOp0 = Cond->getOperand(0);
+  PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
+  Instruction *Incr;
+  if (!PHIExpr) {
+    // Value tested is postinc. Find the phi node.
+    Incr = dyn_cast<BinaryOperator>(CondOp0);
+    // FIXME: Just use User.OperandValToReplace here?
+    if (!Incr || Incr->getOpcode() != Instruction::Add)
+      return false;
 
-  // Make sure the IV is only used for counting.  Value may be preinc or
-  // postinc; 2 uses in either case.
-  if (!Cond->getOperand(0)->hasNUses(2))
-    return;
-  PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
-  Instruction *incr;
-  if (phi && phi->getParent()==L->getHeader()) {
-    // value tested is preinc.  Find the increment.
-    // A CmpInst is not a BinaryOperator; we depend on this.
-    Instruction::use_iterator UI = phi->use_begin();
-    incr = dyn_cast<BinaryOperator>(UI);
-    if (!incr)
-      incr = dyn_cast<BinaryOperator>(++UI);
-    // 1 use for postinc value, the phi.  Unnecessarily conservative?
-    if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
-      return;
-  } else {
-    // Value tested is postinc.  Find the phi node.
-    incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
-    if (!incr || incr->getOpcode()!=Instruction::Add)
-      return;
-
-    Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
-    phi = dyn_cast<PHINode>(UI);
-    if (!phi)
-      phi = dyn_cast<PHINode>(++UI);
+    PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
+    if (!PHIExpr)
+      return false;
     // 1 use for preinc value, the increment.
-    if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
-      return;
+    if (!PHIExpr->hasOneUse())
+      return false;
+  } else {
+    assert(isa<PHINode>(CondOp0) &&
+           "Unexpected loop exiting counting instruction sequence!");
+    PHIExpr = cast<PHINode>(CondOp0);
+    // Value tested is preinc.  Find the increment.
+    // A CmpInst is not a BinaryOperator; we depend on this.
+    Instruction::use_iterator UI = PHIExpr->use_begin();
+    Incr = dyn_cast<BinaryOperator>(UI);
+    if (!Incr)
+      Incr = dyn_cast<BinaryOperator>(++UI);
+    // One use for postinc value, the phi.  Unnecessarily conservative?
+    if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
+      return false;
   }
 
   // Replace the increment with a decrement.
-  BinaryOperator *decr = 
-    BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
-                           incr->getOperand(1), "tmp", incr);
-  incr->replaceAllUsesWith(decr);
-  incr->eraseFromParent();
+  DEBUG(errs() << "LSR: Examining use ");
+  DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
+  DEBUG(errs() << " in Inst: " << *Cond << '\n');
+  BinaryOperator *Decr =  BinaryOperator::Create(Instruction::Sub,
+                         Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
+  Incr->replaceAllUsesWith(Decr);
+  Incr->eraseFromParent();
 
   // Substitute endval-startval for the original startval, and 0 for the
-  // original endval.  Since we're only testing for equality this is OK even 
+  // original endval.  Since we're only testing for equality this is OK even
   // if the computation wraps around.
   BasicBlock  *Preheader = L->getLoopPreheader();
   Instruction *PreInsertPt = Preheader->getTerminator();
-  int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
-  Value *startVal = phi->getIncomingValue(inBlock);
-  Value *endVal = Cond->getOperand(1);
-  // FIXME check for case where both are constant
+  unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
+  Value *StartVal = PHIExpr->getIncomingValue(InBlock);
+  Value *EndVal = Cond->getOperand(1);
+  DEBUG(errs() << "    Optimize loop counting iv to count down ["
+        << *EndVal << " .. " << *StartVal << "]\n");
+
+  // FIXME: check for case where both are constant.
   Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
-  BinaryOperator *NewStartVal = 
-    BinaryOperator::Create(Instruction::Sub, endVal, startVal,
-                           "tmp", PreInsertPt);
-  phi->setIncomingValue(inBlock, NewStartVal);
+  BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
+                                          EndVal, StartVal, "tmp", PreInsertPt);
+  PHIExpr->setIncomingValue(InBlock, NewStartVal);
   Cond->setOperand(1, Zero);
+  DEBUG(errs() << "    New icmp: " << *Cond << "\n");
+
+  int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+  const SCEV *NewStride = 0;
+  bool Found = false;
+  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+    const SCEV *OldStride = IU->StrideOrder[i];
+    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
+      if (SC->getValue()->getSExtValue() == -SInt) {
+        Found = true;
+        NewStride = OldStride;
+        break;
+      }
+  }
+
+  if (!Found)
+    NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
+  IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
+  IU->IVUsesByStride[Stride]->removeUser(CondUse);
+
+  CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
+  Stride = NewStride;
 
-  Changed = true;
+  ++NumCountZero;
+
+  return true;
 }
 
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
+bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+  bool ThisChanged = false;
+  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+    const SCEV *Stride = IU->StrideOrder[i];
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+      IU->IVUsesByStride.find(Stride);
+    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+    // FIXME: Generalize to non-affine IV's.
+    if (!SI->first->isLoopInvariant(L))
+      continue;
+    // If stride is a constant and it has an icmpinst use, check if we can
+    // optimize the loop to count down.
+    if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
+      Instruction *User = SI->second->Users.begin()->getUser();
+      if (!isa<ICmpInst>(User))
+        continue;
+      const SCEV *CondStride = Stride;
+      IVStrideUse *Use = &*SI->second->Users.begin();
+      if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
+        continue;
+      ThisChanged = true;
 
+      // Now check if it's possible to reuse this iv for other stride uses.
+      for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
+        const SCEV *SStride = IU->StrideOrder[j];
+        if (SStride == CondStride)
+          continue;
+        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
+          IU->IVUsesByStride.find(SStride);
+        assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+        // FIXME: Generalize to non-affine IV's.
+        if (!SII->first->isLoopInvariant(L))
+          continue;
+        // FIXME: Rewrite other stride using CondStride.
+      }
+    }
+  }
+
+  Changed |= ThisChanged;
+  return ThisChanged;
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   DT = &getAnalysis<DominatorTree>();
   SE = &getAnalysis<ScalarEvolution>();
   Changed = false;
 
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->getLoopPreheader() || !L->getLoopLatch())
+    return false;
+
   if (!IU->IVUsesByStride.empty()) {
     DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
           << "\" ";
@@ -2545,7 +2771,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     // Change loop terminating condition to use the postinc iv when possible
     // and optimize loop terminating compare. FIXME: Move this after
-    // StrengthReduceStridedIVUsers?
+    // StrengthReduceIVUsersOfStride?
     OptimizeLoopTermCond(L);
 
     // FIXME: We can shrink overlarge IV's here.  e.g. if the code has
@@ -2561,26 +2787,12 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
     // IVsByStride keeps IVs for one particular loop.
     assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
 
-    // Note: this processes each stride/type pair individually.  All users
-    // passed into StrengthReduceStridedIVUsers have the same type AND stride.
-    // Also, note that we iterate over IVUsesByStride indirectly by using
-    // StrideOrder. This extra layer of indirection makes the ordering of
-    // strides deterministic - not dependent on map order.
-    for (unsigned Stride = 0, e = IU->StrideOrder.size();
-         Stride != e; ++Stride) {
-      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-        IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-      // FIXME: Generalize to non-affine IV's.
-      if (!SI->first->isLoopInvariant(L))
-        continue;
-      StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
-    }
-  }
+    StrengthReduceIVUsers(L);
 
-  // After all sharing is done, see if we can adjust the loop to test against
-  // zero instead of counting up to a maximum.  This is usually faster.
-  OptimizeLoopCountIV(L);
+    // After all sharing is done, see if we can adjust the loop to test against
+    // zero instead of counting up to a maximum.  This is usually faster.
+    OptimizeLoopCountIV(L);
+  }
 
   // We're done analyzing this loop; release all the state we built up for it.
   IVsByStride.clear();
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
deleted file mode 100644
index 837ec59..0000000
--- a/lib/Transforms/Scalar/LoopUnroll.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements a simple loop unroller.  It works best when loops have
-// been canonicalized by the -indvars pass, allowing it to determine the trip
-// counts of loops easily.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-unroll"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <climits>
-
-using namespace llvm;
-
-static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
-  cl::desc("The cut-off point for automatic loop unrolling"));
-
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
-  cl::desc("Use this unroll count for all loops, for testing purposes"));
-
-static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
-  cl::desc("Allows loops to be partially unrolled until "
-           "-unroll-threshold loop size is reached."));
-
-namespace {
-  class LoopUnroll : public LoopPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    LoopUnroll() : LoopPass(&ID) {}
-
-    /// A magic value for use with the Threshold parameter to indicate
-    /// that the loop unroll should be performed regardless of how much
-    /// code expansion would result.
-    static const unsigned NoThreshold = UINT_MAX;
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG...
-    ///
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addRequiredID(LCSSAID);
-      AU.addRequired<LoopInfo>();
-      AU.addPreservedID(LCSSAID);
-      AU.addPreserved<LoopInfo>();
-      // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
-      // If loop unroll does not preserve dom info then LCSSA pass on next
-      // loop will receive invalid dom info.
-      // For now, recreate dom info, if loop is unrolled.
-      AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
-    }
-  };
-}
-
-char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
-
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
-
-/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L) {
-  unsigned Size = 0;
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-    Instruction *Term = BB->getTerminator();
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      if (isa<PHINode>(I) && BB == L->getHeader()) {
-        // Ignore PHI nodes in the header.
-      } else if (I->hasOneUse() && I->use_back() == Term) {
-        // Ignore instructions only used by the loop terminator.
-      } else if (isa<DbgInfoIntrinsic>(I)) {
-        // Ignore debug instructions
-      } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) {
-        // Ignore GEP as they generally are subsumed into a load or store.
-      } else if (isa<CallInst>(I)) {
-        // Estimate size overhead introduced by call instructions which
-        // is higher than other instructions. Here 3 and 10 are magic
-        // numbers that help one isolated test case from PR2067 without
-        // negatively impacting measured benchmarks.
-        Size += isa<IntrinsicInst>(I) ? 3 : 10;
-      } else {
-        ++Size;
-      }
-
-      // TODO: Ignore expressions derived from PHI and constants if inval of phi
-      // is a constant, or if operation is associative.  This will get induction
-      // variables.
-    }
-  }
-
-  return Size;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
-  assert(L->isLCSSAForm());
-  LoopInfo *LI = &getAnalysis<LoopInfo>();
-
-  BasicBlock *Header = L->getHeader();
-  DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
-        << "] Loop %" << Header->getName() << "\n");
-  (void)Header;
-
-  // Find trip count
-  unsigned TripCount = L->getSmallConstantTripCount();
-  unsigned Count = UnrollCount;
-
-  // Automatically select an unroll count.
-  if (Count == 0) {
-    // Conservative heuristic: if we know the trip count, see if we can
-    // completely unroll (subject to the threshold, checked below); otherwise
-    // try to find greatest modulo of the trip count which is still under
-    // threshold value.
-    if (TripCount == 0)
-      return false;
-    Count = TripCount;
-  }
-
-  // Enforce the threshold.
-  if (UnrollThreshold != NoThreshold) {
-    unsigned LoopSize = ApproximateLoopSize(L);
-    DEBUG(errs() << "  Loop Size = " << LoopSize << "\n");
-    uint64_t Size = (uint64_t)LoopSize*Count;
-    if (TripCount != 1 && Size > UnrollThreshold) {
-      DEBUG(errs() << "  Too large to fully unroll with count: " << Count
-            << " because size: " << Size << ">" << UnrollThreshold << "\n");
-      if (!UnrollAllowPartial) {
-        DEBUG(errs() << "  will not try to unroll partially because "
-              << "-unroll-allow-partial not given\n");
-        return false;
-      }
-      // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = UnrollThreshold / LoopSize;
-      while (Count != 0 && TripCount%Count != 0) {
-        Count--;
-      }
-      if (Count < 2) {
-        DEBUG(errs() << "  could not unroll partially\n");
-        return false;
-      }
-      DEBUG(errs() << "  partially unrolling with count: " << Count << "\n");
-    }
-  }
-
-  // Unroll the loop.
-  Function *F = L->getHeader()->getParent();
-  if (!UnrollLoop(L, Count, LI, &LPM))
-    return false;
-
-  // FIXME: Reconstruct dom info, because it is not preserved properly.
-  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
-  if (DT) {
-    DT->runOnFunction(*F);
-    DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
-    if (DF)
-      DF->runOnFunction(*F);
-  }
-  return true;
-}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index c7b00da..38d267a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,6 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -407,6 +406,10 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
   initLoopData();
   Function *F = loopHeader->getParent();
 
+  // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+  if (!loopPreheader)
+    return false;
+
   // If the condition is trivial, always unswitch.  There is no code growth for
   // this case.
   if (!IsTrivialUnswitchCondition(LoopCond)) {
@@ -957,7 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     Worklist.pop_back();
     
     // Simple constant folding.
-    if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {
+    if (Constant *C = ConstantFoldInstruction(I)) {
       ReplaceUsesOfWith(I, C, Worklist, L, LPM);
       continue;
     }
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index af29f97..8466918 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
@@ -198,8 +197,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
 /// LowerNegateToMultiply - Replace 0-X with X*-1.
 ///
 static Instruction *LowerNegateToMultiply(Instruction *Neg,
-                              std::map<AssertingVH<>, unsigned> &ValueRankMap,
-                              LLVMContext &Context) {
+                              std::map<AssertingVH<>, unsigned> &ValueRankMap) {
   Constant *Cst = Constant::getAllOnesValue(Neg->getType());
 
   Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -255,7 +253,6 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
                                     std::vector<ValueEntry> &Ops) {
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
   unsigned Opcode = I->getOpcode();
-  LLVMContext &Context = I->getContext();
 
   // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
   BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
@@ -265,13 +262,11 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
   // transform them into multiplies by -1 so they can be reassociated.
   if (I->getOpcode() == Instruction::Mul) {
     if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
-      LHS = LowerNegateToMultiply(cast<Instruction>(LHS),
-                                  ValueRankMap, Context);
+      LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
       LHSBO = isReassociableOp(LHS, Opcode);
     }
     if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
-      RHS = LowerNegateToMultiply(cast<Instruction>(RHS),
-                                  ValueRankMap, Context);
+      RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
       RHSBO = isReassociableOp(RHS, Opcode);
     }
   }
@@ -373,7 +368,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
 // version of the value is returned, and BI is left pointing at the instruction
 // that should be processed next by the reassociation pass.
 //
-static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
+static Value *NegateValue(Value *V, Instruction *BI) {
   // We are trying to expose opportunity for reassociation.  One of the things
   // that we want to do to achieve this is to push a negation as deep into an
   // expression chain as possible, to expose the add instructions.  In practice,
@@ -386,8 +381,8 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
       // Push the negates through the add.
-      I->setOperand(0, NegateValue(Context, I->getOperand(0), BI));
-      I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));
+      I->setOperand(0, NegateValue(I->getOperand(0), BI));
+      I->setOperand(1, NegateValue(I->getOperand(1), BI));
 
       // We must move the add instruction here, because the neg instructions do
       // not dominate the old add instruction in general.  By moving it, we are
@@ -407,7 +402,7 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
 
 /// ShouldBreakUpSubtract - Return true if we should break up this subtract of
 /// X-Y into (X + -Y).
-static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
   // If this is a negation, we can't split it up!
   if (BinaryOperator::isNeg(Sub))
     return false;
@@ -431,7 +426,7 @@ static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
 /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
 /// only used by an add, transform this into (X+(0-Y)) to promote better
 /// reassociation.
-static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
+static Instruction *BreakUpSubtract(Instruction *Sub,
                               std::map<AssertingVH<>, unsigned> &ValueRankMap) {
   // Convert a subtract into an add and a neg instruction... so that sub
   // instructions can be commuted with other add instructions...
@@ -439,7 +434,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
   // Calculate the negative value of Operand 1 of the sub instruction...
   // and set it as the RHS of the add instruction we just made...
   //
-  Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);
+  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
   Instruction *New =
     BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
   New->takeName(Sub);
@@ -457,8 +452,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
 /// by one, change this into a multiply by a constant to assist with further
 /// reassociation.
 static Instruction *ConvertShiftToMul(Instruction *Shl, 
-                              std::map<AssertingVH<>, unsigned> &ValueRankMap,
-                              LLVMContext &Context) {
+                              std::map<AssertingVH<>, unsigned> &ValueRankMap) {
   // If an operand of this shift is a reassociable multiply, or if the shift
   // is used by a reassociable multiply or add, turn into a multiply.
   if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
@@ -781,13 +775,11 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 /// ReassociateBB - Inspect all of the instructions in this basic block,
 /// reassociating them as we go.
 void Reassociate::ReassociateBB(BasicBlock *BB) {
-  LLVMContext &Context = BB->getContext();
-  
   for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
     Instruction *BI = BBI++;
     if (BI->getOpcode() == Instruction::Shl &&
         isa<ConstantInt>(BI->getOperand(1)))
-      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) {
+      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
         MadeChange = true;
         BI = NI;
       }
@@ -800,8 +792,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
     // If this is a subtract instruction which is not already in negate form,
     // see if we can convert it to X+-Y.
     if (BI->getOpcode() == Instruction::Sub) {
-      if (ShouldBreakUpSubtract(Context, BI)) {
-        BI = BreakUpSubtract(Context, BI, ValueRankMap);
+      if (ShouldBreakUpSubtract(BI)) {
+        BI = BreakUpSubtract(BI, ValueRankMap);
         MadeChange = true;
       } else if (BinaryOperator::isNeg(BI)) {
         // Otherwise, this is a negation.  See if the operand is a multiply tree
@@ -809,7 +801,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
         if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
             (!BI->hasOneUse() ||
              !isReassociableOp(BI->use_back(), Instruction::Mul))) {
-          BI = LowerNegateToMultiply(BI, ValueRankMap, Context);
+          BI = LowerNegateToMultiply(BI, ValueRankMap);
           MadeChange = true;
         }
       }
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 509a6db..c202a2c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -795,9 +795,14 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
     return markOverdefined(&EVI);
 
   Value *AggVal = EVI.getAggregateOperand();
-  unsigned i = *EVI.idx_begin();
-  LatticeVal EltVal = getStructValueState(AggVal, i);
-  mergeInValue(getValueState(&EVI), &EVI, EltVal);
+  if (isa<StructType>(AggVal->getType())) {
+    unsigned i = *EVI.idx_begin();
+    LatticeVal EltVal = getStructValueState(AggVal, i);
+    mergeInValue(getValueState(&EVI), &EVI, EltVal);
+  } else {
+    // Otherwise, must be extracting from an array.
+    return markOverdefined(&EVI);
+  }
 }
 
 void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index c047fca..001267a 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -507,7 +507,7 @@ void ValueTable::erase(Value *V) {
 /// verifyRemoved - Verify that the value is removed from all internal data
 /// structures.
 void ValueTable::verifyRemoved(const Value *V) const {
-  for (DenseMap<Value*, uint32_t>::iterator
+  for (DenseMap<Value*, uint32_t>::const_iterator
          I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
     assert(I->first != V && "Inst still occurs in value numbering map!");
   }
@@ -629,9 +629,6 @@ bool SCCVN::runOnFunction(Function& F) {
     }
   }
 
-  // FIXME: This code is commented out for now, because it can lead to the
-  // insertion of a lot of redundant PHIs being inserted by SSAUpdater.
-#if 0
   // Perform a forward data-flow to compute availability at all points on
   // the CFG.
   do {
@@ -709,7 +706,6 @@ bool SCCVN::runOnFunction(Function& F) {
       CurInst->eraseFromParent();
     }
   }
-#endif
 
   VT.clear();
   for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 5669da0..b54565c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -26,10 +26,6 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createCFGSimplificationPass());
 }
 
-void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createCondPropagationPass());
-}
-
 void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createDeadStoreEliminationPass());
 }
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 575c93b..611505e 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -100,7 +100,7 @@ public:
 
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
-  void EmitPutChar(Value *Char, IRBuilder<> &B);
+  Value *EmitPutChar(Value *Char, IRBuilder<> &B);
 
   /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
   /// some pointer.
@@ -252,18 +252,20 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
 
 /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
 /// is an integer.
-void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
+Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
   Module *M = Caller->getParent();
   Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
                                           Type::getInt32Ty(*Context), NULL);
   CallInst *CI = B.CreateCall(PutChar,
                               B.CreateIntCast(Char,
 					      Type::getInt32Ty(*Context),
+                                              /*isSigned*/true,
 					      "chari"),
                               "putchar");
 
   if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
+  return CI;
 }
 
 /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
@@ -302,7 +304,8 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
 			       Type::getInt32Ty(*Context),
 			       Type::getInt32Ty(*Context),
                                File->getType(), NULL);
-  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");
+  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
+                         "chari");
   CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -955,6 +958,17 @@ struct MemCmpOpt : public LibCallOptimization {
       return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
     }
 
+    // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+    std::string LHSStr, RHSStr;
+    if (GetConstantStringInfo(LHS, LHSStr) &&
+        GetConstantStringInfo(RHS, RHSStr)) {
+      // Make sure we're not reading out-of-bounds memory.
+      if (Len > LHSStr.length() || Len > RHSStr.length())
+        return 0;
+      uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      return ConstantInt::get(CI->getType(), Ret);
+    }
+
     return 0;
   }
 };
@@ -1314,11 +1328,13 @@ struct PrintFOpt : public LibCallOptimization {
       return CI->use_empty() ? (Value*)CI :
                                ConstantInt::get(CI->getType(), 0);
 
-    // printf("x") -> putchar('x'), even for '%'.
+    // printf("x") -> putchar('x'), even for '%'.  Return the result of putchar
+    // in case there is an error writing to stdout.
     if (FormatStr.size() == 1) {
-      EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B);
-      return CI->use_empty() ? (Value*)CI :
-                               ConstantInt::get(CI->getType(), 1);
+      Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
+                                                FormatStr[0]), B);
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
     }
 
     // printf("foo\n") --> puts("foo")
@@ -1339,9 +1355,10 @@ struct PrintFOpt : public LibCallOptimization {
     // printf("%c", chr) --> putchar(*(i8*)dst)
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
         isa<IntegerType>(CI->getOperand(2)->getType())) {
-      EmitPutChar(CI->getOperand(2), B);
-      return CI->use_empty() ? (Value*)CI :
-                               ConstantInt::get(CI->getType(), 1);
+      Value *Res = EmitPutChar(CI->getOperand(2), B);
+      
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
     }
 
     // printf("%s\n", str) --> puts(str)
@@ -2479,10 +2496,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 // lround, lroundf, lroundl:
 //   * lround(cnst) -> cnst'
 //
-// memcmp:
-//   * memcmp(x,y,l)   -> cnst
-//      (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
-//
 // pow, powf, powl:
 //   * pow(exp(x),y)  -> exp(x*y)
 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 4864e23..b06ae3d 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -359,8 +359,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
       Instruction *Inst = BI++;
       if (isInstructionTriviallyDead(Inst))
         Inst->eraseFromParent();
-      else if (Constant *C = ConstantFoldInstruction(Inst,
-                                                     Inst->getContext())) {
+      else if (Constant *C = ConstantFoldInstruction(Inst)) {
         Inst->replaceAllUsesWith(C);
         Inst->eraseFromParent();
       }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b56e170..4119cb9 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -25,7 +25,7 @@
 //     unlikely, that the return returns something else (like constant 0), and
 //     can still be TRE'd.  It can be TRE'd if ALL OTHER return instructions in
 //     the function return the exact same value.
-//  4. If it can prove that callees do not access theier caller stack frame,
+//  4. If it can prove that callees do not access their caller stack frame,
 //     they are marked as eligible for tail call elimination (by the code
 //     generator).
 //
@@ -58,6 +58,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -75,7 +76,7 @@ namespace {
   private:
     bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
                                bool &TailCallsAreMarkedTail,
-                               std::vector<PHINode*> &ArgumentPHIs,
+                               SmallVector<PHINode*, 8> &ArgumentPHIs,
                                bool CannotTailCallElimCallsMarkedTail);
     bool CanMoveAboveCall(Instruction *I, CallInst *CI);
     Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
@@ -90,7 +91,6 @@ FunctionPass *llvm::createTailCallEliminationPass() {
   return new TailCallElim();
 }
 
-
 /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
 /// callees of this function.  We only do very simple analysis right now, this
 /// could be expanded in the future to use mod/ref information for particular
@@ -100,7 +100,7 @@ static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
   return true;
 }
 
-/// FunctionContainsAllocas - Scan the specified basic block for alloca
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
 /// instructions.  If it contains any that might be accessed by calls, return
 /// true.
 static bool CheckForEscapingAllocas(BasicBlock *BB,
@@ -127,7 +127,7 @@ bool TailCallElim::runOnFunction(Function &F) {
 
   BasicBlock *OldEntry = 0;
   bool TailCallsAreMarkedTail = false;
-  std::vector<PHINode*> ArgumentPHIs;
+  SmallVector<PHINode*, 8> ArgumentPHIs;
   bool MadeChange = false;
 
   bool FunctionContainsEscapingAllocas = false;
@@ -154,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
   /// happen.  This bug is PR962.
   if (FunctionContainsEscapingAllocas)
     return false;
-  
 
   // Second pass, change any tail calls to loops.
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -204,7 +203,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
   if (I->mayHaveSideEffects())  // This also handles volatile loads.
     return false;
   
-  if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
     // Loads may always be moved above calls without side effects.
     if (CI->mayHaveSideEffects()) {
       // Non-volatile loads may be moved above a call with side effects if it
@@ -235,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
 // We currently handle static constants and arguments that are not modified as
 // part of the recursion.
 //
-static bool isDynamicConstant(Value *V, CallInst *CI) {
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
   if (isa<Constant>(V)) return true; // Static constants are always dyn consts
 
   // Check to see if this is an immutable argument, if so, the value
@@ -253,6 +252,15 @@ static bool isDynamicConstant(Value *V, CallInst *CI) {
     if (CI->getOperand(ArgNo+1) == Arg)
       return true;
   }
+
+  // Switch cases are always constant integers. If the value is being switched
+  // on and the return is only reachable from one of its cases, it's
+  // effectively constant.
+  if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+      if (SI->getCondition() == V)
+        return SI->getDefaultDest() != RI->getParent();
+
   // Not a constant or immutable argument, we can't safely transform.
   return false;
 }
@@ -265,10 +273,6 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
   Function *F = TheRI->getParent()->getParent();
   Value *ReturnedValue = 0;
 
-  // TODO: Handle multiple value ret instructions;
-  if (isa<StructType>(F->getReturnType()))
-      return 0;
-
   for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
     if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
       if (RI != TheRI) {
@@ -278,7 +282,7 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
         // evaluatable at the start of the initial invocation of the function,
         // instead of at the end of the evaluation.
         //
-        if (!isDynamicConstant(RetOp, CI))
+        if (!isDynamicConstant(RetOp, CI, RI))
           return 0;
 
         if (ReturnedValue && RetOp != ReturnedValue)
@@ -315,7 +319,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
 
 bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
                                          bool &TailCallsAreMarkedTail,
-                                         std::vector<PHINode*> &ArgumentPHIs,
+                                         SmallVector<PHINode*, 8> &ArgumentPHIs,
                                        bool CannotTailCallElimCallsMarkedTail) {
   BasicBlock *BB = Ret->getParent();
   Function *F = BB->getParent();
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index c728c0b..2974592 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -275,8 +275,6 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
 /// SplitEdge -  Split the edge connecting specified block. Pass P must 
 /// not be NULL. 
 BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
-  assert(!isa<IndirectBrInst>(BB->getTerminator()) &&
-         "Cannot split an edge from an IndirectBrInst");
   TerminatorInst *LatchTerm = BB->getTerminator();
   unsigned SuccNum = 0;
 #ifndef NDEBUG
@@ -386,6 +384,12 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   bool IsLoopEntry = !!L;
   bool SplitMakesNewLoopHeader = false;
   for (unsigned i = 0; i != NumPreds; ++i) {
+    // This is slightly more strict than necessary; the minimum requirement
+    // is that there be no more than one indirectbr branching to BB. And
+    // all BlockAddress uses would need to be updated.
+    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+
     Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
 
     if (LI) {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index fd8862c..162d7b3 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -322,8 +323,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
 /// mapping its operands through ValueMap if they are available.
 Constant *PruningFunctionCloner::
 ConstantFoldMappedInstruction(const Instruction *I) {
-  LLVMContext &Context = I->getContext();
-  
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
@@ -333,9 +332,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
       return 0;  // All operands not constant!
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Ops[0], Ops.size(), 
-                                           Context, TD);
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
 
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
@@ -346,7 +344,28 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                                           CE);
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
-                                  Ops.size(), Context, TD);
+                                  Ops.size(), TD);
+}
+
+static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD,
+                                   LLVMContext &Context) {
+  DILocation ILoc(InsnMD);
+  if (ILoc.isNull()) return InsnMD;
+
+  DILocation CallLoc(TheCallMD);
+  if (CallLoc.isNull()) return InsnMD;
+
+  DILocation OrigLocation = ILoc.getOrigLocation();
+  MDNode *NewLoc = TheCallMD;
+  if (!OrigLocation.isNull())
+    NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD, Context);
+
+  SmallVector<Value *, 4> MDVs;
+  MDVs.push_back(InsnMD->getElement(0)); // Line
+  MDVs.push_back(InsnMD->getElement(1)); // Col
+  MDVs.push_back(InsnMD->getElement(2)); // Scope
+  MDVs.push_back(NewLoc);
+  return MDNode::get(Context, MDVs.data(), MDVs.size());
 }
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -361,7 +380,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
-                                     const TargetData *TD) {
+                                     const TargetData *TD,
+                                     Instruction *TheCall) {
   assert(NameSuffix && "NameSuffix cannot be null!");
   
 #ifndef NDEBUG
@@ -400,19 +420,52 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // references as we go.  This uses ValueMap to do all the hard work.
     //
     BasicBlock::iterator I = NewBB->begin();
+
+    LLVMContext &Context = OldFunc->getContext();
+    unsigned DbgKind = Context.getMetadata().getMDKind("dbg");
+    MDNode *TheCallMD = NULL;
+    SmallVector<Value *, 4> MDVs;
+    if (TheCall && TheCall->hasMetadata()) 
+      TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall);
     
     // Handle PHI nodes specially, as we have to remove references to dead
     // blocks.
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
       // Skip over all PHI nodes, remembering them for later.
       BasicBlock::const_iterator OldI = BI->begin();
-      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
+        if (I->hasMetadata()) {
+          if (TheCallMD) {
+            if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
+              MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
+              Context.getMetadata().addMD(DbgKind, NewMD, I);
+            }
+          } else {
+            // The cloned instruction has dbg info but the call instruction
+            // does not have dbg info. Remove dbg info from cloned instruction.
+            Context.getMetadata().removeMD(DbgKind, I);
+          }
+        }
         PHIToResolve.push_back(cast<PHINode>(OldI));
+      }
     }
     
     // Otherwise, remap the rest of the instructions normally.
-    for (; I != NewBB->end(); ++I)
+    for (; I != NewBB->end(); ++I) {
+      if (I->hasMetadata()) {
+        if (TheCallMD) {
+          if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
+            MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
+            Context.getMetadata().addMD(DbgKind, NewMD, I);
+          }
+        } else {
+          // The cloned instruction has dbg info but the call instruction
+          // does not have dbg info. Remove dbg info from cloned instruction.
+          Context.getMetadata().removeMD(DbgKind, I);
+        }
+      }
       RemapInstruction(I, ValueMap);
+    }
   }
   
   // Defer PHI resolution until rest of function is resolved, PHI resolution
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 20f5a4a..043046c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -386,7 +386,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // (which can happen, e.g., because an argument was constant), but we'll be
     // happy with whatever the cloner can do.
     CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
-                              &InlinedFunctionInfo, TD);
+                              &InlinedFunctionInfo, TD, TheCall);
 
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 56e662e..590d667 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -50,7 +50,6 @@ namespace {
     LCSSA() : LoopPass(&ID) {}
 
     // Cached analysis information for the current function.
-    LoopInfo *LI;
     DominatorTree *DT;
     std::vector<BasicBlock*> LoopBlocks;
     PredIteratorCache PredCache;
@@ -64,6 +63,9 @@ namespace {
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+
+      // LCSSA doesn't actually require LoopSimplify, but the PassManager
+      // doesn't know how to schedule LoopSimplify by itself.
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredTransitive<LoopInfo>();
@@ -121,7 +123,6 @@ static bool BlockDominatesAnExit(BasicBlock *BB,
 bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
   L = TheLoop;
   
-  LI = &LPM.getAnalysis<LoopInfo>();
   DT = &getAnalysis<DominatorTree>();
 
   // Get the set of exiting blocks.
@@ -216,7 +217,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
   SSAUpdate.Initialize(Inst);
   
   // Insert the LCSSA phi's into all of the exit blocks dominated by the
-  // value., and add them to the Phi's map.
+  // value, and add them to the Phi's map.
   for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
       BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
     BasicBlock *ExitBB = *BBI;
@@ -230,8 +231,17 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
     PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
 
     // Add inputs from inside the loop for this PHI.
-    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
       PN->addIncoming(Inst, *PI);
+
+      // If the exit block has a predecessor not within the loop, arrange for
+      // the incoming value use corresponding to that predecessor to be
+      // rewritten in terms of a different LCSSA PHI.
+      if (!inLoop(*PI))
+        UsesToRewrite.push_back(
+          &PN->getOperandUse(
+            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+    }
     
     // Remember that this phi makes the value alive in this block.
     SSAUpdate.AddAvailableValue(ExitBB, PN);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 543ddf1..aef0f5f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -24,10 +24,14 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -236,7 +240,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
 
 
 //===----------------------------------------------------------------------===//
-//  Local dead code elimination...
+//  Local dead code elimination.
 //
 
 /// isInstructionTriviallyDead - Return true if the result produced by the
@@ -248,6 +252,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
   // We don't want debug info removed by anything this general.
   if (isa<DbgInfoIntrinsic>(I)) return false;
 
+  // Likewise for memory use markers.
+  if (isa<MemoryUseIntrinsic>(I)) return false;
+
   if (!I->mayHaveSideEffects()) return true;
 
   // Special case intrinsics that "may have side effects" but can be deleted
@@ -323,9 +330,53 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
 }
 
 //===----------------------------------------------------------------------===//
-//  Control Flow Graph Restructuring...
+//  Control Flow Graph Restructuring.
 //
 
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB.  If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values.  For example, if we have:
+///   x = phi(1, 0, 0, 0)
+///   y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+                                        TargetData *TD) {
+  // This only adjusts blocks with PHI nodes.
+  if (!isa<PHINode>(BB->begin()))
+    return;
+  
+  // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+  // them down.  This will leave us with single entry phi nodes and other phis
+  // that can be removed.
+  BB->removePredecessor(Pred, true);
+  
+  WeakVH PhiIt = &BB->front();
+  while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+    PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+    
+    Value *PNV = PN->hasConstantValue();
+    if (PNV == 0) continue;
+    
+    // If we're able to simplify the phi to a single value, substitute the new
+    // value into all of its uses.
+    assert(PNV != PN && "hasConstantValue broken");
+    
+    ReplaceAndSimplifyAllUses(PN, PNV, TD);
+    
+    // If recursive simplification ended up deleting the next PHI node we would
+    // iterate to, then our iterator is invalid, restart scanning from the top
+    // of the block.
+    if (PhiIt == 0) PhiIt = &BB->front();
+  }
+}
+
+
 /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
 /// predecessor is known to have one successor (DestBB!).  Eliminate the edge
 /// between them, moving the instructions in the predecessor into DestBB and
@@ -362,6 +413,174 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   PredBB->eraseFromParent();
 }
 
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+  assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+  DEBUG(errs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
+  // Shortcut, if there is only a single predecessor it must be BB and merging
+  // is always safe
+  if (Succ->getSinglePredecessor()) return true;
+
+  // Make a list of the predecessors of BB
+  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+  // Use that list to make another list of common predecessors of BB and Succ
+  BlockSet CommonPreds;
+  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+        PI != PE; ++PI)
+    if (BBPreds.count(*PI))
+      CommonPreds.insert(*PI);
+
+  // Shortcut, if there are no common predecessors, merging is always safe
+  if (CommonPreds.empty())
+    return true;
+  
+  // Look at all the phi nodes in Succ, to see if they present a conflict when
+  // merging these blocks
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+
+    // If the incoming value from BB is again a PHINode in
+    // BB which has the same incoming value for *PI as PN does, we can
+    // merge the phi nodes and then the blocks can still be merged
+    PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+    if (BBPN && BBPN->getParent() == BB) {
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        if (BBPN->getIncomingValueForBlock(*PI) 
+              != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    } else {
+      Value* Val = PN->getIncomingValueForBlock(BB);
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        // See if the incoming value for the common predecessor is equal to the
+        // one for BB, in which case this phi node will not prevent the merging
+        // of the block.
+        if (Val != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch.  If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true.  If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+  // We can't eliminate infinite loops.
+  BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+  if (BB == Succ) return false;
+  
+  // Check to see if merging these blocks would cause conflicts for any of the
+  // phi nodes in BB or Succ. If not, we can safely merge.
+  if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
+  
+  if (isa<PHINode>(Succ->begin())) {
+    // If there is more than one pred of succ, and there are PHI nodes in
+    // the successor, then we need to add incoming edges for the PHI nodes
+    //
+    const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+    
+    // Loop over all of the PHI nodes in the successor of BB.
+    for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      Value *OldVal = PN->removeIncomingValue(BB, false);
+      assert(OldVal && "No entry in PHI for Pred BB!");
+      
+      // If this incoming value is one of the PHI nodes in BB, the new entries
+      // in the PHI node are the entries from the old PHI.
+      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+        PHINode *OldValPN = cast<PHINode>(OldVal);
+        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+          // Note that, since we are merging phi nodes and BB and Succ might
+          // have common predecessors, we could end up with a phi node with
+          // identical incoming branches. This will be cleaned up later (and
+          // will trigger asserts if we try to clean it up now, without also
+          // simplifying the corresponding conditional branch).
+          PN->addIncoming(OldValPN->getIncomingValue(i),
+                          OldValPN->getIncomingBlock(i));
+      } else {
+        // Add an incoming value for each of the new incoming values.
+        for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+          PN->addIncoming(OldVal, BBPreds[i]);
+      }
+    }
+  }
+  
+  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+    if (Succ->getSinglePredecessor()) {
+      // BB is the only predecessor of Succ, so Succ will end up with exactly
+      // the same predecessors BB had.
+      Succ->getInstList().splice(Succ->begin(),
+                                 BB->getInstList(), BB->begin());
+    } else {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
+    }
+  }
+    
+  // Everything that jumped to BB now goes to Succ.
+  BB->replaceAllUsesWith(Succ);
+  if (!Succ->hasName()) Succ->takeName(BB);
+  BB->eraseFromParent();              // Delete the old basic block.
+  return true;
+}
+
+
+
 /// OnlyUsedByDbgIntrinsics - Return true if the instruction I is only used
 /// by DbgIntrinsics. If DbgInUses is specified then the vector is filled 
 /// with the DbgInfoIntrinsic that use the instruction I.
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cd8d952..2ab0972 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -23,6 +23,11 @@
 //
 // This pass also guarantees that loops will have exactly one backedge.
 //
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
 // Note that the simplifycfg pass will clean up blocks which are split out but
 // end up being unnecessary, so usage of this pass should not pessimize
 // generated code.
@@ -81,17 +86,15 @@ namespace {
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
     }
 
-    /// verifyAnalysis() - Verify loop nest.
-    void verifyAnalysis() const {
-      assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");
-    }
+    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+    void verifyAnalysis() const;
 
   private:
     bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
     BasicBlock *InsertPreheaderForLoop(Loop *L);
     Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
-    void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+    BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
     void PlaceSplitBlockCarefully(BasicBlock *NewBB,
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
                                   Loop *L);
@@ -160,8 +163,10 @@ ReprocessLoop:
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     Preheader = InsertPreheaderForLoop(L);
-    NumInserted++;
-    Changed = true;
+    if (Preheader) {
+      NumInserted++;
+      Changed = true;
+    }
   }
 
   // Next, check to make sure that all exit nodes of the loop only have
@@ -180,21 +185,22 @@ ReprocessLoop:
       // Must be exactly this loop: no subloops, parent loops, or non-loop preds
       // allowed.
       if (!L->contains(*PI)) {
-        RewriteLoopExitBlock(L, ExitBlock);
-        NumInserted++;
-        Changed = true;
+        if (RewriteLoopExitBlock(L, ExitBlock)) {
+          NumInserted++;
+          Changed = true;
+        }
         break;
       }
   }
 
   // If the header has more than two predecessors at this point (from the
   // preheader and from multiple backedges), we must adjust the loop.
-  unsigned NumBackedges = L->getNumBackEdges();
-  if (NumBackedges != 1) {
+  BasicBlock *LoopLatch = L->getLoopLatch();
+  if (!LoopLatch) {
     // If this is really a nested loop, rip it out into a child loop.  Don't do
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
-    if (NumBackedges < 8) {
+    if (L->getNumBackEdges() < 8) {
       if (SeparateNestedLoop(L, LPM)) {
         ++NumNested;
         // This is a big restructuring change, reprocess the whole loop.
@@ -207,9 +213,11 @@ ReprocessLoop:
     // If we either couldn't, or didn't want to, identify nesting of the loops,
     // insert a new block that all backedges target, then make it jump to the
     // loop header.
-    InsertUniqueBackedgeBlock(L, Preheader);
-    NumInserted++;
-    Changed = true;
+    LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+    if (LoopLatch) {
+      NumInserted++;
+      Changed = true;
+    }
   }
 
   // Scan over the PHI nodes in the loop header.  Since they now have only two
@@ -233,7 +241,14 @@ ReprocessLoop:
   // loop-invariant instructions out of the way to open up more
   // opportunities, and the disadvantage of having the responsibility
   // to preserve dominator information.
-  if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+  bool UniqueExit = true;
+  if (!ExitBlocks.empty())
+    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+      if (ExitBlocks[i] != ExitBlocks[0]) {
+        UniqueExit = false;
+        break;
+      }
+  if (UniqueExit) {
     SmallVector<BasicBlock*, 8> ExitingBlocks;
     L->getExitingBlocks(ExitingBlocks);
     for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
@@ -251,7 +266,8 @@ ReprocessLoop:
         Instruction *Inst = I++;
         if (Inst == CI)
           continue;
-        if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {
+        if (!L->makeLoopInvariant(Inst, Changed,
+                                  Preheader ? Preheader->getTerminator() : 0)) {
           AllInvariant = false;
           break;
         }
@@ -303,8 +319,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   SmallVector<BasicBlock*, 8> OutsideBlocks;
   for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
        PI != PE; ++PI)
-    if (!L->contains(*PI))           // Coming in from outside the loop?
-      OutsideBlocks.push_back(*PI);  // Keep track of it...
+    if (!L->contains(*PI)) {         // Coming in from outside the loop?
+      // If the loop is branched to from an indirect branch, we won't
+      // be able to fully transform the loop, because it prohibits
+      // edge splitting.
+      if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0;
+
+      // Keep track of it.
+      OutsideBlocks.push_back(*PI);
+    }
 
   // Split out the loop pre-header.
   BasicBlock *NewBB =
@@ -324,8 +347,12 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
 BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
   SmallVector<BasicBlock*, 8> LoopBlocks;
   for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I)
-    if (L->contains(*I))
+    if (L->contains(*I)) {
+      // Don't do this if the loop is exited via an indirect branch.
+      if (isa<IndirectBrInst>((*I)->getTerminator())) return 0;
+
       LoopBlocks.push_back(*I);
+    }
 
   assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
   BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], 
@@ -519,13 +546,18 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
 /// backedges to target a new basic block and have that block branch to the loop
 /// header.  This ensures that loops have exactly one backedge.
 ///
-void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
   // Get information about the loop
   BasicBlock *Header = L->getHeader();
   Function *F = Header->getParent();
 
+  // Unique backedge insertion currently depends on having a preheader.
+  if (!Preheader)
+    return 0;
+
   // Figure out which basic blocks contain back-edges to the loop header.
   std::vector<BasicBlock*> BackedgeBlocks;
   for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I)
@@ -612,4 +644,40 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   DT->splitBlock(BEBlock);
   if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
     DF->splitBlock(BEBlock);
+
+  return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+  // It used to be possible to just assert L->isLoopSimplifyForm(), however
+  // with the introduction of indirectbr, there are now cases where it's
+  // not possible to transform a loop as necessary. We can at least check
+  // that there is an indirectbr near any time there's trouble.
+
+  // Indirectbr can interfere with preheader and unique backedge insertion.
+  if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+    bool HasIndBrPred = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+         PE = pred_end(L->getHeader()); PI != PE; ++PI)
+      if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+        HasIndBrPred = true;
+        break;
+      }
+    assert(HasIndBrPred &&
+           "LoopSimplify has no excuse for missing loop header info!");
+  }
+
+  // Indirectbr can interfere with exit block canonicalization.
+  if (!L->hasDedicatedExits()) {
+    bool HasIndBrExiting = false;
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+      if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+        HasIndBrExiting = true;
+        break;
+      }
+    assert(HasIndBrExiting &&
+           "LoopSimplify has no excuse for missing exit block info!");
+  }
 }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index d68427a..6232f32 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -108,8 +108,19 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
 bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
   assert(L->isLCSSAForm());
 
-  BasicBlock *Header = L->getHeader();
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    DEBUG(errs() << "  Can't unroll; loop preheader-insertion failed.\n");
+    return false;
+  }
+
   BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock) {
+    DEBUG(errs() << "  Can't unroll; loop exit-block-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *Header = L->getHeader();
   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
   
   if (!BI || BI->isUnconditional()) {
@@ -351,8 +362,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
-      else if (Constant *C = ConstantFoldInstruction(Inst, 
-                                                     Header->getContext())) {
+      else if (Constant *C = ConstantFoldInstruction(Inst)) {
         Inst->replaceAllUsesWith(C);
         (*BB)->getInstList().erase(Inst);
       }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 8e1fb98..8dbc808 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -78,166 +78,6 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
     PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
 }
 
-/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
-///
-/// Assumption: Succ is the single successor for BB.
-///
-static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
-  assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
-
-  DEBUG(errs() << "Looking to fold " << BB->getName() << " into " 
-        << Succ->getName() << "\n");
-  // Shortcut, if there is only a single predecessor it must be BB and merging
-  // is always safe
-  if (Succ->getSinglePredecessor()) return true;
-
-  // Make a list of the predecessors of BB
-  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
-  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
-  // Use that list to make another list of common predecessors of BB and Succ
-  BlockSet CommonPreds;
-  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
-        PI != PE; ++PI)
-    if (BBPreds.count(*PI))
-      CommonPreds.insert(*PI);
-
-  // Shortcut, if there are no common predecessors, merging is always safe
-  if (CommonPreds.empty())
-    return true;
-  
-  // Look at all the phi nodes in Succ, to see if they present a conflict when
-  // merging these blocks
-  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PN = cast<PHINode>(I);
-
-    // If the incoming value from BB is again a PHINode in
-    // BB which has the same incoming value for *PI as PN does, we can
-    // merge the phi nodes and then the blocks can still be merged
-    PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
-    if (BBPN && BBPN->getParent() == BB) {
-      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-            PI != PE; PI++) {
-        if (BBPN->getIncomingValueForBlock(*PI) 
-              != PN->getIncomingValueForBlock(*PI)) {
-          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
-                << Succ->getName() << " is conflicting with " 
-                << BBPN->getName() << " with regard to common predecessor "
-                << (*PI)->getName() << "\n");
-          return false;
-        }
-      }
-    } else {
-      Value* Val = PN->getIncomingValueForBlock(BB);
-      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-            PI != PE; PI++) {
-        // See if the incoming value for the common predecessor is equal to the
-        // one for BB, in which case this phi node will not prevent the merging
-        // of the block.
-        if (Val != PN->getIncomingValueForBlock(*PI)) {
-          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
-                << Succ->getName() << " is conflicting with regard to common "
-                << "predecessor " << (*PI)->getName() << "\n");
-          return false;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
-/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional
-/// branch to Succ, and contains no instructions other than PHI nodes and the
-/// branch.  If possible, eliminate BB.
-static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
-                                                    BasicBlock *Succ) {
-  // Check to see if merging these blocks would cause conflicts for any of the
-  // phi nodes in BB or Succ. If not, we can safely merge.
-  if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-
-  // Check for cases where Succ has multiple predecessors and a PHI node in BB
-  // has uses which will not disappear when the PHI nodes are merged.  It is
-  // possible to handle such cases, but difficult: it requires checking whether
-  // BB dominates Succ, which is non-trivial to calculate in the case where
-  // Succ has multiple predecessors.  Also, it requires checking whether
-  // constructing the necessary self-referential PHI node doesn't intoduce any
-  // conflicts; this isn't too difficult, but the previous code for doing this
-  // was incorrect.
-  //
-  // Note that if this check finds a live use, BB dominates Succ, so BB is
-  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
-  // folding the branch isn't profitable in that case anyway.
-  if (!Succ->getSinglePredecessor()) {
-    BasicBlock::iterator BBI = BB->begin();
-    while (isa<PHINode>(*BBI)) {
-      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
-           UI != E; ++UI) {
-        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
-          if (PN->getIncomingBlock(UI) != BB)
-            return false;
-        } else {
-          return false;
-        }
-      }
-      ++BBI;
-    }
-  }
-
-  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
-  
-  if (isa<PHINode>(Succ->begin())) {
-    // If there is more than one pred of succ, and there are PHI nodes in
-    // the successor, then we need to add incoming edges for the PHI nodes
-    //
-    const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-    
-    // Loop over all of the PHI nodes in the successor of BB.
-    for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
-      PHINode *PN = cast<PHINode>(I);
-      Value *OldVal = PN->removeIncomingValue(BB, false);
-      assert(OldVal && "No entry in PHI for Pred BB!");
-      
-      // If this incoming value is one of the PHI nodes in BB, the new entries
-      // in the PHI node are the entries from the old PHI.
-      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
-        PHINode *OldValPN = cast<PHINode>(OldVal);
-        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
-          // Note that, since we are merging phi nodes and BB and Succ might
-          // have common predecessors, we could end up with a phi node with
-          // identical incoming branches. This will be cleaned up later (and
-          // will trigger asserts if we try to clean it up now, without also
-          // simplifying the corresponding conditional branch).
-          PN->addIncoming(OldValPN->getIncomingValue(i),
-                          OldValPN->getIncomingBlock(i));
-      } else {
-        // Add an incoming value for each of the new incoming values.
-        for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
-          PN->addIncoming(OldVal, BBPreds[i]);
-      }
-    }
-  }
-  
-  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-    if (Succ->getSinglePredecessor()) {
-      // BB is the only predecessor of Succ, so Succ will end up with exactly
-      // the same predecessors BB had.
-      Succ->getInstList().splice(Succ->begin(),
-                                 BB->getInstList(), BB->begin());
-    } else {
-      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
-      assert(PN->use_empty() && "There shouldn't be any uses here!");
-      PN->eraseFromParent();
-    }
-  }
-    
-  // Everything that jumped to BB now goes to Succ.
-  BB->replaceAllUsesWith(Succ);
-  if (!Succ->hasName()) Succ->takeName(BB);
-  BB->eraseFromParent();              // Delete the old basic block.
-  return true;
-}
 
 /// GetIfCondition - Given a basic block (BB) with two predecessors (and
 /// presumably PHI nodes in it), check to see if the merge at this block is due
@@ -1217,7 +1057,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
           }
           
           // Check for trivial simplification.
-          if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {
+          if (Constant *C = ConstantFoldInstruction(N)) {
             TranslateMap[BBI] = C;
             delete N;   // Constant folded away, don't need actual inst
           } else {
@@ -1983,13 +1823,11 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
     if (BI->isUnconditional()) {
       BasicBlock::iterator BBI = BB->getFirstNonPHI();
 
-      BasicBlock *Succ = BI->getSuccessor(0);
       // Ignore dbg intrinsics.
       while (isa<DbgInfoIntrinsic>(BBI))
         ++BBI;
-      if (BBI->isTerminator() &&  // Terminator is the only non-phi instruction!
-          Succ != BB)             // Don't hurt infinite loops!
-        if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ))
+      if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
+        if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
           return true;
       
     } else {  // Conditional branch
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 9a803a1..82d7914 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1238,7 +1238,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
     return;
   }
 
-  if (V->getValueID() == Value::PseudoSourceValueVal) {
+  if (V->getValueID() == Value::PseudoSourceValueVal ||
+      V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
     V->print(Out);
     return;
   }
@@ -1497,8 +1498,8 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
   case GlobalValue::AvailableExternallyLinkage:
     Out << "available_externally ";
     break;
-  case GlobalValue::GhostLinkage:
-    llvm_unreachable("GhostLinkage not allowed in AsmWriter!");
+    // This is invalid syntax and just a debugging aid.
+  case GlobalValue::GhostLinkage:	  Out << "ghost ";	    break;
   }
 }
 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 000a063..c622558 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -318,7 +318,7 @@ Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
   return C;
 }
 
-ConstantInt* ConstantInt::get(const IntegerType* Ty, const StringRef& Str,
+ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
                               uint8_t radix) {
   return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
 }
@@ -362,7 +362,7 @@ Constant* ConstantFP::get(const Type* Ty, double V) {
 }
 
 
-Constant* ConstantFP::get(const Type* Ty, const StringRef& Str) {
+Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
   LLVMContext &Context = Ty->getContext();
 
   APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -508,7 +508,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
 /// Otherwise, the length parameter specifies how much of the string to use 
 /// and it won't be null terminated.
 ///
-Constant* ConstantArray::get(LLVMContext &Context, const StringRef &Str,
+Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
                              bool AddNull) {
   std::vector<Constant*> ElementVals;
   for (unsigned i = 0; i < Str.size(); ++i)
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 1a34180..78cd4dc 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1860,8 +1860,9 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
 }
 
 LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
-                              LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), Name));
+                              LLVMTypeRef DestTy, int isSigned,
+                              const char *Name) {
+  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), isSigned, Name));
 }
 
 LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
@@ -1987,13 +1988,15 @@ int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
 
 int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
                                     char **OutMessage) {
-  if (MemoryBuffer *MB = MemoryBuffer::getSTDIN()) {
-    *OutMemBuf = wrap(MB);
-    return 0;
+  MemoryBuffer *MB = MemoryBuffer::getSTDIN();
+  if (!MB->getBufferSize()) {
+    delete MB;
+    *OutMessage = strdup("stdin is empty.");
+    return 1;
   }
-  
-  *OutMessage = strdup("stdin is empty.");
-  return 1;
+
+  *OutMemBuf = wrap(MB);
+  return 0;
 }
 
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 03ceecb..94bf3de 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -16,7 +16,6 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -95,8 +94,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
 // GlobalVariable Implementation
 //===----------------------------------------------------------------------===//
 
-GlobalVariable::GlobalVariable(LLVMContext &Context, const Type *Ty,
-                               bool constant, LinkageTypes Link,
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
                                Constant *InitVal, const Twine &Name,
                                bool ThreadLocal, unsigned AddressSpace)
   : GlobalValue(PointerType::get(Ty, AddressSpace), 
@@ -173,6 +171,21 @@ void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
   this->setOperand(0, cast<Constant>(To));
 }
 
+void GlobalVariable::setInitializer(Constant *InitVal) {
+  if (InitVal == 0) {
+    if (hasInitializer()) {
+      Op<0>().set(0);
+      NumOperands = 0;
+    }
+  } else {
+    assert(InitVal->getType() == getType()->getElementType() &&
+           "Initializer type must match GlobalVariable type");
+    if (!hasInitializer())
+      NumOperands = 1;
+    Op<0>().set(InitVal);
+  }
+}
+
 /// copyAttributesFrom - copy all additional attributes (those not needed to
 /// create a GlobalVariable) from the GlobalVariable Src to this one.
 void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 3a36a1b..16de1af 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -26,16 +26,16 @@ InlineAsm::~InlineAsm() {
 // NOTE: when memoizing the function type, we have to be careful to handle the
 // case when the type gets refined.
 
-InlineAsm *InlineAsm::get(const FunctionType *Ty, const StringRef &AsmString,
-                          const StringRef &Constraints, bool hasSideEffects,
+InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
+                          StringRef Constraints, bool hasSideEffects,
                           bool isAlignStack) {
   // FIXME: memoize!
   return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects, 
                        isAlignStack);
 }
 
-InlineAsm::InlineAsm(const FunctionType *Ty, const StringRef &asmString,
-                     const StringRef &constraints, bool hasSideEffects,
+InlineAsm::InlineAsm(const FunctionType *Ty, StringRef asmString,
+                     StringRef constraints, bool hasSideEffects,
                      bool isAlignStack)
   : Value(PointerType::getUnqual(Ty), 
           Value::InlineAsmVal), 
@@ -54,7 +54,7 @@ const FunctionType *InlineAsm::getFunctionType() const {
 /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
 /// fields in this structure.  If the constraint string is not understood,
 /// return true, otherwise return false.
-bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str,
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
                      std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) {
   StringRef::iterator I = Str.begin(), E = Str.end();
   
@@ -149,7 +149,7 @@ bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str,
 }
 
 std::vector<InlineAsm::ConstraintInfo>
-InlineAsm::ParseConstraints(const StringRef &Constraints) {
+InlineAsm::ParseConstraints(StringRef Constraints) {
   std::vector<ConstraintInfo> Result;
   
   // Scan the constraints string.
@@ -183,7 +183,7 @@ InlineAsm::ParseConstraints(const StringRef &Constraints) {
 
 /// Verify - Verify that the specified constraint string is reasonable for the
 /// specified function type, and otherwise validate the constraint string.
-bool InlineAsm::Verify(const FunctionType *Ty, const StringRef &ConstStr) {
+bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
   if (Ty->isVarArg()) return false;
   
   std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr);
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 279bc73..b03ee93 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -24,8 +24,6 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetData.h"
-
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -465,9 +463,11 @@ static Instruction *createMalloc(Instruction *InsertBefore,
     ArraySize = ConstantInt::get(IntPtrTy, 1);
   else if (ArraySize->getType() != IntPtrTy) {
     if (InsertBefore)
-      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore);
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertBefore);
     else
-      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd);
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertAtEnd);
   }
 
   if (!IsConstantOne(ArraySize)) {
@@ -494,22 +494,21 @@ static Instruction *createMalloc(Instruction *InsertBefore,
   BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
   Module* M = BB->getParent()->getParent();
   const Type *BPTy = Type::getInt8PtrTy(BB->getContext());
-  if (!MallocF)
+  Value *MallocFunc = MallocF;
+  if (!MallocFunc)
     // prototype malloc as "void *malloc(size_t)"
-    MallocF = cast<Function>(M->getOrInsertFunction("malloc", BPTy,
-                                                    IntPtrTy, NULL));
-  if (!MallocF->doesNotAlias(0)) MallocF->setDoesNotAlias(0);
+    MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
   const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
   CallInst *MCall = NULL;
   Instruction *Result = NULL;
   if (InsertBefore) {
-    MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertBefore);
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
     Result = MCall;
     if (Result->getType() != AllocPtrType)
       // Create a cast instruction to convert to the right type...
       Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
   } else {
-    MCall = CallInst::Create(MallocF, AllocSize, "malloccall");
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
     Result = MCall;
     if (Result->getType() != AllocPtrType) {
       InsertAtEnd->getInstList().push_back(MCall);
@@ -518,6 +517,10 @@ static Instruction *createMalloc(Instruction *InsertBefore,
     }
   }
   MCall->setTailCall();
+  if (Function *F = dyn_cast<Function>(MallocFunc)) {
+    MCall->setCallingConv(F->getCallingConv());
+    if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+  }
   assert(MCall->getType() != Type::getVoidTy(BB->getContext()) &&
          "Malloc has void return type");
 
@@ -567,8 +570,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
   const Type *VoidTy = Type::getVoidTy(M->getContext());
   const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
   // prototype free as "void free(void*)"
-  Constant *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
-
+  Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
   CallInst* Result = NULL;
   Value *PtrCast = Source;
   if (InsertBefore) {
@@ -581,6 +583,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
     Result = CallInst::Create(FreeFunc, PtrCast, "");
   }
   Result->setTailCall();
+  if (Function *F = dyn_cast<Function>(FreeFunc))
+    Result->setCallingConv(F->getCallingConv());
 
   return Result;
 }
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 4fadfed..24e715b 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -39,6 +39,17 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) {
     new MDString(Context, Entry.getKey());
 }
 
+MDString *MDString::get(LLVMContext &Context, const char *Str) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  StringMapEntry<MDString *> &Entry = 
+    pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef());
+  MDString *&S = Entry.getValue();
+  if (S) return S;
+  
+  return S = 
+    new MDString(Context, Entry.getKey());
+}
+
 //===----------------------------------------------------------------------===//
 // MDNode implementation.
 //
@@ -341,11 +352,11 @@ MDNode *MetadataContextImpl::getMD(unsigned MDKind, const Instruction *Inst) {
 /// getMDs - Get the metadata attached to an Instruction.
 void MetadataContextImpl::
 getMDs(const Instruction *Inst, SmallVectorImpl<MDPairTy> &MDs) const {
-  MDStoreTy::iterator I = MetadataStore.find(Inst);
+  MDStoreTy::const_iterator I = MetadataStore.find(Inst);
   if (I == MetadataStore.end())
     return;
   MDs.resize(I->second.size());
-  for (MDMapTy::iterator MI = I->second.begin(), ME = I->second.end();
+  for (MDMapTy::const_iterator MI = I->second.begin(), ME = I->second.end();
        MI != ME; ++MI)
     // MD kinds are numbered from 1.
     MDs[MI->first - 1] = std::make_pair(MI->first, MI->second);
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index add2449..3efd3e3 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -31,8 +31,7 @@ using namespace llvm;
 //
 
 GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
-  GlobalVariable *Ret = new GlobalVariable(getGlobalContext(), 
-                                           Type::getInt32Ty(getGlobalContext()),
+  GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()),
                                            false, GlobalValue::ExternalLinkage);
   // This should not be garbage monitored.
   LeakDetector::removeGarbageObject(Ret);
@@ -56,7 +55,7 @@ template class SymbolTableListTraits<GlobalAlias, Module>;
 // Primitive Module methods.
 //
 
-Module::Module(const StringRef &MID, LLVMContext& C)
+Module::Module(StringRef MID, LLVMContext& C)
   : Context(C), ModuleID(MID), DataLayout("")  {
   ValSymTab = new ValueSymbolTable();
   TypeSymTab = new TypeSymbolTable();
@@ -115,7 +114,7 @@ Module::PointerSize Module::getPointerSize() const {
 /// getNamedValue - Return the first global value in the module with
 /// the specified name, of arbitrary type.  This method returns null
 /// if a global with the specified name is not found.
-GlobalValue *Module::getNamedValue(const StringRef &Name) const {
+GlobalValue *Module::getNamedValue(StringRef Name) const {
   return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
 }
 
@@ -128,7 +127,7 @@ GlobalValue *Module::getNamedValue(const StringRef &Name) const {
 // it.  This is nice because it allows most passes to get away with not handling
 // the symbol table directly for this common task.
 //
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
                                       const FunctionType *Ty,
                                       AttrListPtr AttributeList) {
   // See if we have a definition for the specified function already.
@@ -161,7 +160,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
   return F;  
 }
 
-Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name,
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
                                              const FunctionType *Ty,
                                              AttrListPtr AttributeList) {
   // See if we have a definition for the specified function already.
@@ -178,7 +177,7 @@ Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name,
   return F;  
 }
 
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
                                       const FunctionType *Ty) {
   AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
   return getOrInsertFunction(Name, Ty, AttributeList);
@@ -189,7 +188,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
 // This version of the method takes a null terminated list of function
 // arguments, which makes it easier for clients to use.
 //
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
                                       AttrListPtr AttributeList,
                                       const Type *RetTy, ...) {
   va_list Args;
@@ -208,7 +207,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
                              AttributeList);
 }
 
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
                                       const Type *RetTy, ...) {
   va_list Args;
   va_start(Args, RetTy);
@@ -229,7 +228,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
 // getFunction - Look up the specified function in the module symbol table.
 // If it does not exist, return null.
 //
-Function *Module::getFunction(const StringRef &Name) const {
+Function *Module::getFunction(StringRef Name) const {
   return dyn_cast_or_null<Function>(getNamedValue(Name));
 }
 
@@ -244,7 +243,7 @@ Function *Module::getFunction(const StringRef &Name) const {
 /// If AllowLocal is set to true, this function will return types that
 /// have an local. By default, these types are not returned.
 ///
-GlobalVariable *Module::getGlobalVariable(const StringRef &Name,
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
                                           bool AllowLocal) const {
   if (GlobalVariable *Result = 
       dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
@@ -259,7 +258,7 @@ GlobalVariable *Module::getGlobalVariable(const StringRef &Name,
 ///      with a constantexpr cast to the right type.
 ///   3. Finally, if the existing global is the correct delclaration, return the
 ///      existing global.
-Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) {
+Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) {
   // See if we have a definition for the specified global already.
   GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
   if (GV == 0) {
@@ -286,21 +285,21 @@ Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) {
 // getNamedAlias - Look up the specified global in the module symbol table.
 // If it does not exist, return null.
 //
-GlobalAlias *Module::getNamedAlias(const StringRef &Name) const {
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
   return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
 }
 
 /// getNamedMetadata - Return the first NamedMDNode in the module with the
 /// specified name. This method returns null if a NamedMDNode with the 
 //// specified name is not found.
-NamedMDNode *Module::getNamedMetadata(const StringRef &Name) const {
+NamedMDNode *Module::getNamedMetadata(StringRef Name) const {
   return dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
 }
 
 /// getOrInsertNamedMetadata - Return the first named MDNode in the module 
 /// with the specified name. This method returns a new NamedMDNode if a 
 /// NamedMDNode with the specified name is not found.
-NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) {
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
   NamedMDNode *NMD =
     dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
   if (!NMD)
@@ -317,7 +316,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) {
 // there is already an entry for this name, true is returned and the symbol
 // table is not modified.
 //
-bool Module::addTypeName(const StringRef &Name, const Type *Ty) {
+bool Module::addTypeName(StringRef Name, const Type *Ty) {
   TypeSymbolTable &ST = getTypeSymbolTable();
 
   if (ST.lookup(Name)) return true;  // Already in symtab...
@@ -331,7 +330,7 @@ bool Module::addTypeName(const StringRef &Name, const Type *Ty) {
 
 /// getTypeByName - Return the type with the specified name in this module, or
 /// null if there is none by that name.
-const Type *Module::getTypeByName(const StringRef &Name) const {
+const Type *Module::getTypeByName(StringRef Name) const {
   const TypeSymbolTable &ST = getTypeSymbolTable();
   return cast_or_null<Type>(ST.lookup(Name));
 }
@@ -377,14 +376,14 @@ void Module::dropAllReferences() {
     I->dropAllReferences();
 }
 
-void Module::addLibrary(const StringRef& Lib) {
+void Module::addLibrary(StringRef Lib) {
   for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
     if (*I == Lib)
       return;
   LibraryList.push_back(Lib);
 }
 
-void Module::removeLibrary(const StringRef& Lib) {
+void Module::removeLibrary(StringRef Lib) {
   LibraryListType::iterator I = LibraryList.begin();
   LibraryListType::iterator E = LibraryList.end();
   for (;I != E; ++I)
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index a17eed8..1232fe2 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -149,7 +149,7 @@ public:
     return I != PassInfoMap.end() ? I->second : 0;
   }
   
-  const PassInfo *GetPassInfo(const StringRef &Arg) const {
+  const PassInfo *GetPassInfo(StringRef Arg) const {
     StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
     return I != PassInfoStringMap.end() ? I->second : 0;
   }
@@ -238,7 +238,7 @@ const PassInfo *Pass::lookupPassInfo(intptr_t TI) {
   return getPassRegistrar()->GetPassInfo(TI);
 }
 
-const PassInfo *Pass::lookupPassInfo(const StringRef &Arg) {
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
   return getPassRegistrar()->GetPassInfo(Arg);
 }
 
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index eb097ed..d3d61f5 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -746,7 +746,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
 }
 
 /// Remove analysis passes that are not used any longer
-void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg,
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
                                      enum PassDebuggingString DBG_STR) {
 
   SmallVector<Pass *, 12> DeadPasses;
@@ -768,7 +768,7 @@ void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg,
     freePass(*I, Msg, DBG_STR);
 }
 
-void PMDataManager::freePass(Pass *P, const StringRef &Msg,
+void PMDataManager::freePass(Pass *P, StringRef Msg,
                              enum PassDebuggingString DBG_STR) {
   dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
 
@@ -972,7 +972,7 @@ void PMDataManager::dumpPassArguments() const {
 
 void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
                                  enum PassDebuggingString S2,
-                                 const StringRef &Msg) {
+                                 StringRef Msg) {
   if (PassDebugging < Executions)
     return;
   errs() << (void*)this << std::string(getDepth()*2+1, ' ');
@@ -1028,7 +1028,7 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const {
   dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
 }
 
-void PMDataManager::dumpAnalysisUsage(const StringRef &Msg, const Pass *P,
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
                                    const AnalysisUsage::VectorType &Set) const {
   assert(PassDebugging >= Details);
   if (Set.empty())
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 3440a77..0d0cdf5 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -31,7 +31,7 @@ TypeSymbolTable::~TypeSymbolTable() {
   }
 }
 
-std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
+std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const {
   std::string TryName = BaseName;
   
   const_iterator End = tmap.end();
@@ -43,7 +43,7 @@ std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
 }
 
 // lookup a type by name - returns null on failure
-Type* TypeSymbolTable::lookup(const StringRef &Name) const {
+Type* TypeSymbolTable::lookup(StringRef Name) const {
   const_iterator TI = tmap.find(Name);
   Type* result = 0;
   if (TI != tmap.end())
@@ -51,7 +51,6 @@ Type* TypeSymbolTable::lookup(const StringRef &Name) const {
   return result;
 }
 
-
 // remove - Remove a type from the symbol table...
 Type* TypeSymbolTable::remove(iterator Entry) {
   assert(Entry != tmap.end() && "Invalid entry to remove!");
@@ -80,7 +79,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 
 
 // insert - Insert a type into the symbol table with the specified name...
-void TypeSymbolTable::insert(const StringRef &Name, const Type* T) {
+void TypeSymbolTable::insert(StringRef Name, const Type* T) {
   assert(T && "Can't insert null type into symbol table!");
 
   if (tmap.insert(std::make_pair(Name, T)).second) {
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 7765a98..9d39a50 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -77,7 +77,7 @@ void ValueSymbolTable::removeValueName(ValueName *V) {
 /// createValueName - This method attempts to create a value name and insert
 /// it into the symbol table with the specified name.  If it conflicts, it
 /// auto-renames the name and returns that instead.
-ValueName *ValueSymbolTable::createValueName(const StringRef &Name, Value *V) {
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
   // In the common case, the name is not already in the symbol table.
   ValueName &Entry = vmap.GetOrCreateValue(Name);
   if (Entry.getValue() == 0) {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 5990e48..7ab7b15 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -780,9 +780,13 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
   // Check to make sure that all of the constants in the switch instruction
   // have the same type as the switched-on value.
   const Type *SwitchTy = SI.getCondition()->getType();
-  for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i)
+  SmallPtrSet<ConstantInt*, 32> Constants;
+  for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
     Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
             "Switch constants must all be same type as switch value!", &SI);
+    Assert2(Constants.insert(SI.getCaseValue(i)),
+            "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+  }
 
   visitTerminatorInst(SI);
 }
diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll
index 267c3b8..c8ca648 100644
--- a/test/Analysis/PointerTracking/sizes.ll
+++ b/test/Analysis/PointerTracking/sizes.ll
@@ -31,7 +31,6 @@ entry:
 }
 
 declare i32 @bar(i8*)
-declare i32 @bar2(i64*)
 
 define i32 @foo1(i32 %n) nounwind {
 entry:
@@ -66,11 +65,6 @@ entry:
 	%call = tail call i8* @malloc(i64 %n)  ; <i8*> [#uses=1]
 ; CHECK: %call =
 ; CHECK: ==> %n elements, %n bytes allocated
-	%mallocsize = mul i64 %n, 8                     ; <i64> [#uses=1]
-	%malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
-	%call3 = bitcast i8* %malloccall to i64*        ; <i64*> [#uses=1]
-; CHECK: %malloccall =
-; CHECK: ==> (8 * %n) elements, (8 * %n) bytes allocated
 	%call2 = tail call i8* @calloc(i64 2, i64 4) nounwind		; <i8*> [#uses=1]
 ; CHECK: %call2 =
 ; CHECK: ==> 8 elements, 8 bytes allocated
@@ -78,12 +72,10 @@ entry:
 ; CHECK: %call4 =
 ; CHECK: ==> 16 elements, 16 bytes allocated
 	%call6 = tail call i32 @bar(i8* %call) nounwind		; <i32> [#uses=1]
-	%call7 = tail call i32 @bar2(i64* %call3) nounwind ; <i32> [#uses=1]
 	%call8 = tail call i32 @bar(i8* %call2) nounwind		; <i32> [#uses=1]
 	%call10 = tail call i32 @bar(i8* %call4) nounwind		; <i32> [#uses=1]
 	%add = add i32 %call8, %call6                   ; <i32> [#uses=1]
-	%add10 = add i32 %add, %call7                   ; <i32> [#uses=1]
-	%add11 = add i32 %add10, %call10                ; <i32> [#uses=1]
+	%add11 = add i32 %add, %call10                ; <i32> [#uses=1]
 	ret i32 %add11
 }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 627b57d..d7037ab 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,31 +1,39 @@
-include(GetTargetTriple)
-get_target_triple(target)
-
 foreach(c ${LLVM_TARGETS_TO_BUILD})
   set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
 endforeach(c)
 set(TARGETS_TO_BUILD ${TARGETS_BUILT})
 
+# FIXME: This won't work for project files, we need to use a --param.
+set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
+set(SHLIBEXT "${LTDL_SHLIB_EXT}")
+
 include(FindPythonInterp)
 if(PYTHONINTERP_FOUND)
-  get_target_property(LLVM_TOOLS_PATH llvm-config RUNTIME_OUTPUT_DIRECTORY)
-
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
     ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
 
-  add_custom_target(llvm-test
+  MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
+
+  add_custom_target(check
     COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#"
                 -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#"
-                -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_PATH}/${CMAKE_CFG_INTDIR}#"
-                -e "s#\@LLVMGCC_DIR\@##"
+                -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_BINARY_DIR}/${CMAKE_CFG_INTDIR}#"
+                -e "s#\@LLVMGCCDIR\@##"
                 ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in >
                 ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-    COMMAND ${PYTHON_EXECUTABLE} 
+    COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#"
+                -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#"
+                -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_BINARY_DIR}/${CMAKE_CFG_INTDIR}#"
+                -e "s#\@LLVMGCCDIR\@##"
+                -e "s#\@LLVM_BUILD_MODE\@#${CMAKE_CFG_INTDIR}#"
+                ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in >
+                ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+    COMMAND ${PYTHON_EXECUTABLE}
                 ${LLVM_SOURCE_DIR}/utils/lit/lit.py
                 -sv
                 ${CMAKE_CURRENT_BINARY_DIR}
                 DEPENDS
                 COMMENT "Running LLVM regression tests")
 
-endif()  
+endif()
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index ee93fde..2b7ccd8 100644
--- a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13
 
 	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
 	%struct.VEC2 = type { double, double, double }
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 98cab9a..3909c6a 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: fstdhi
+;CHECK: vstrhi.64
   store double %1, double* %y, align 4
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 6281775..5476d5f 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: fldd
+; CHECK: vldr.64
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
index c260b97..62f3786 100644
--- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
@@ -11,12 +11,15 @@ entry:
   %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
   store <4 x float> %quat.0, <4 x float>* %0
   %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+;CHECK: vmov.f32
+;CHECK: vmov.f32
   %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
   %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
   %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
   %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+;CHECK: vmov
   %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
   %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
   %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
new file mode 100644
index 0000000..dd2845f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; PR5423
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo() {
+entry:
+  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %1 = fmul float %0, undef                       ; <float> [#uses=2]
+  %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
+  %3 = fmul float %0, %1                          ; <float> [#uses=1]
+  %4 = fadd float 0.000000e+00, %3                ; <float> [#uses=1]
+  %5 = fsub float 1.000000e+00, %4                ; <float> [#uses=1]
+; CHECK: foo:
+; CHECK: fconsts s{{[0-9]+}}, #112
+  %6 = fsub float 1.000000e+00, undef             ; <float> [#uses=2]
+  %7 = fsub float %2, undef                       ; <float> [#uses=1]
+  %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
+  %9 = fadd float %2, undef                       ; <float> [#uses=3]
+  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %11 = fmul float %8, %10                        ; <float> [#uses=1]
+  %12 = fadd float undef, %11                     ; <float> [#uses=2]
+  %13 = fmul float undef, undef                   ; <float> [#uses=1]
+  %14 = fmul float %6, 0.000000e+00               ; <float> [#uses=1]
+  %15 = fadd float %13, %14                       ; <float> [#uses=1]
+  %16 = fmul float %9, %10                        ; <float> [#uses=1]
+  %17 = fadd float %15, %16                       ; <float> [#uses=2]
+  %18 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
+  %20 = fmul float undef, %10                     ; <float> [#uses=1]
+  %21 = fadd float %19, %20                       ; <float> [#uses=1]
+  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %23 = fmul float %5, %22                        ; <float> [#uses=1]
+  %24 = fadd float %23, undef                     ; <float> [#uses=1]
+  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %26 = fmul float %8, %25                        ; <float> [#uses=1]
+  %27 = fadd float %24, %26                       ; <float> [#uses=1]
+  %28 = fmul float %9, %25                        ; <float> [#uses=1]
+  %29 = fadd float undef, %28                     ; <float> [#uses=1]
+  %30 = fmul float %8, undef                      ; <float> [#uses=1]
+  %31 = fadd float undef, %30                     ; <float> [#uses=1]
+  %32 = fmul float %6, undef                      ; <float> [#uses=1]
+  %33 = fadd float undef, %32                     ; <float> [#uses=1]
+  %34 = fmul float %9, undef                      ; <float> [#uses=1]
+  %35 = fadd float %33, %34                       ; <float> [#uses=1]
+  %36 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %37 = fmul float %7, undef                      ; <float> [#uses=1]
+  %38 = fadd float %36, %37                       ; <float> [#uses=1]
+  %39 = fmul float undef, undef                   ; <float> [#uses=1]
+  %40 = fadd float %38, %39                       ; <float> [#uses=1]
+  store float %12, float* undef, align 8
+  store float %17, float* undef, align 4
+  store float %21, float* undef, align 8
+  store float %27, float* undef, align 8
+  store float %29, float* undef, align 4
+  store float %31, float* undef, align 8
+  store float %40, float* undef, align 8
+  store float %12, float* null, align 8
+  %41 = fmul float %17, undef                     ; <float> [#uses=1]
+  %42 = fadd float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float %35, undef                     ; <float> [#uses=1]
+  %44 = fadd float %42, %43                       ; <float> [#uses=1]
+  store float %44, float* null, align 4
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
new file mode 100644
index 0000000..efc4be1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5410
+
+%0 = type { float, float, float, float }
+%pln = type { %vec, float }
+%vec = type { [4 x float] }
+
+define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
+entry:
+  br i1 undef, label %bb81, label %bb48
+
+bb48:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
+  ret float 0.000000e+00
+
+bb81:                                             ; preds = %entry
+  ret float 0.000000e+00
+}
+
+declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
new file mode 100644
index 0000000..6cce02d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5411
+
+%bar = type { %quad, float, float, [3 x %quux*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quuz, %quad, float, [64 x %quux], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { %quad, %quad }
+%quuz = type { [4 x %quux*], [4 x float], i32 }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quux* %a, %quux* %b, %quux* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
+  %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
+  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %5 = fmul float %4, %2                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, undef                       ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = fmul float undef, %2                       ; <float> [#uses=1]
+  %10 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=1]
+  %12 = fmul float undef, %6                      ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, %8               ; <float> [#uses=1]
+  %14 = fadd float %12, %13                       ; <float> [#uses=1]
+  %15 = fmul float %1, %11                        ; <float> [#uses=1]
+  %16 = fadd float %14, %15                       ; <float> [#uses=1]
+  %17 = select i1 undef, float undef, float %16   ; <float> [#uses=1]
+  %18 = fdiv float %17, 0.000000e+00              ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  %19 = fmul float %4, undef                      ; <float> [#uses=1]
+  store float %19, float* %0, align 4
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
new file mode 100644
index 0000000..3ff6631
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+  %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %5 = fsub float %3, undef                       ; <float> [#uses=2]
+  %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %11 = fsub float %10, undef                     ; <float> [#uses=2]
+  %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %14 = fsub float %13, undef                     ; <float> [#uses=1]
+  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %16 = fsub float %15, undef                     ; <float> [#uses=1]
+  %17 = fmul float %5, %16                        ; <float> [#uses=1]
+  %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
+  %19 = fmul float %8, %11                        ; <float> [#uses=1]
+  %20 = fsub float %19, undef                     ; <float> [#uses=3]
+  %21 = fmul float %1, %14                        ; <float> [#uses=1]
+  %22 = fmul float %5, %11                        ; <float> [#uses=1]
+  %23 = fsub float %21, %22                       ; <float> [#uses=2]
+  store float %18, float* undef
+  %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+  store float %20, float* %24
+  store float %23, float* undef
+  %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+  %26 = fmul float %18, %18                       ; <float> [#uses=1]
+  %27 = fadd float %26, undef                     ; <float> [#uses=1]
+  %28 = fadd float %27, undef                     ; <float> [#uses=1]
+  %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
+  %30 = load float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %35 = fsub float %33, %34                       ; <float> [#uses=2]
+  %36 = fmul float %20, %35                       ; <float> [#uses=1]
+  %37 = fsub float %36, undef                     ; <float> [#uses=1]
+  %38 = fmul float %23, 0.000000e+00              ; <float> [#uses=1]
+  %39 = fmul float %18, %35                       ; <float> [#uses=1]
+  %40 = fsub float %38, %39                       ; <float> [#uses=1]
+  %41 = fmul float %18, 0.000000e+00              ; <float> [#uses=1]
+  %42 = fmul float %20, 0.000000e+00              ; <float> [#uses=1]
+  %43 = fsub float %41, %42                       ; <float> [#uses=1]
+  %44 = fmul float 0.000000e+00, %37              ; <float> [#uses=1]
+  %45 = fmul float %31, %40                       ; <float> [#uses=1]
+  %46 = fadd float %44, %45                       ; <float> [#uses=1]
+  %47 = fmul float %33, %43                       ; <float> [#uses=1]
+  %48 = fadd float %46, %47                       ; <float> [#uses=2]
+  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %50 = fsub float %30, %49                       ; <float> [#uses=1]
+  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %52 = fsub float %32, %51                       ; <float> [#uses=2]
+  %53 = load float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %55 = fmul float %54, undef                     ; <float> [#uses=1]
+  %56 = fmul float undef, %52                     ; <float> [#uses=1]
+  %57 = fsub float %55, %56                       ; <float> [#uses=1]
+  %58 = fmul float undef, %52                     ; <float> [#uses=1]
+  %59 = fmul float %54, %50                       ; <float> [#uses=1]
+  %60 = fsub float %58, %59                       ; <float> [#uses=1]
+  %61 = fmul float %30, %57                       ; <float> [#uses=1]
+  %62 = fmul float %32, 0.000000e+00              ; <float> [#uses=1]
+  %63 = fadd float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %34, %60                       ; <float> [#uses=1]
+  %65 = fadd float %63, %64                       ; <float> [#uses=2]
+  %66 = fcmp olt float %48, %65                   ; <i1> [#uses=1]
+  %67 = fsub float %49, 0.000000e+00              ; <float> [#uses=1]
+  %68 = fsub float %51, %31                       ; <float> [#uses=1]
+  %69 = fsub float %53, %33                       ; <float> [#uses=1]
+  %70 = fmul float undef, %67                     ; <float> [#uses=1]
+  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %72 = fmul float %71, %69                       ; <float> [#uses=1]
+  %73 = fsub float %70, %72                       ; <float> [#uses=1]
+  %74 = fmul float %71, %68                       ; <float> [#uses=1]
+  %75 = fsub float %74, 0.000000e+00              ; <float> [#uses=1]
+  %76 = fmul float %51, %73                       ; <float> [#uses=1]
+  %77 = fadd float undef, %76                     ; <float> [#uses=1]
+  %78 = fmul float %53, %75                       ; <float> [#uses=1]
+  %79 = fadd float %77, %78                       ; <float> [#uses=1]
+  %80 = select i1 %66, float %48, float %65       ; <float> [#uses=1]
+  %81 = select i1 undef, float %80, float %79     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
+  %82 = fdiv float %81, %iftmp.164.0              ; <float> [#uses=1]
+  %iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
+  store float %iftmp.165.0, float* undef, align 4
+  br i1 false, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
new file mode 100644
index 0000000..832ff4f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+; rdar://7384107
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  br i1 false, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+  %5 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %8 = fsub float undef, undef                    ; <float> [#uses=1]
+  %9 = fmul float 0.000000e+00, %8                ; <float> [#uses=1]
+  %10 = fmul float %5, 0.000000e+00               ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=3]
+  %12 = fmul float %2, 0.000000e+00               ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %14 = fsub float %12, %13                       ; <float> [#uses=2]
+  store float %14, float* undef
+  %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+  store float 0.000000e+00, float* %15
+  %16 = fmul float %11, %11                       ; <float> [#uses=1]
+  %17 = fadd float %16, 0.000000e+00              ; <float> [#uses=1]
+  %18 = fadd float %17, undef                     ; <float> [#uses=1]
+  %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
+  %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
+  %21 = load float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %25 = fsub float %23, %24                       ; <float> [#uses=2]
+  %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
+  %27 = fsub float %26, undef                     ; <float> [#uses=1]
+  %28 = fmul float %14, 0.000000e+00              ; <float> [#uses=1]
+  %29 = fmul float %11, %25                       ; <float> [#uses=1]
+  %30 = fsub float %28, %29                       ; <float> [#uses=1]
+  %31 = fsub float undef, 0.000000e+00            ; <float> [#uses=1]
+  %32 = fmul float %21, %27                       ; <float> [#uses=1]
+  %33 = fmul float undef, %30                     ; <float> [#uses=1]
+  %34 = fadd float %32, %33                       ; <float> [#uses=1]
+  %35 = fmul float %23, %31                       ; <float> [#uses=1]
+  %36 = fadd float %34, %35                       ; <float> [#uses=1]
+  %37 = load float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %39 = fsub float %22, %38                       ; <float> [#uses=2]
+  %40 = load float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %42 = fmul float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float undef, %39                     ; <float> [#uses=1]
+  %44 = fsub float %42, %43                       ; <float> [#uses=1]
+  %45 = fmul float undef, %39                     ; <float> [#uses=1]
+  %46 = fmul float %41, 0.000000e+00              ; <float> [#uses=1]
+  %47 = fsub float %45, %46                       ; <float> [#uses=1]
+  %48 = fmul float 0.000000e+00, %44              ; <float> [#uses=1]
+  %49 = fmul float %22, undef                     ; <float> [#uses=1]
+  %50 = fadd float %48, %49                       ; <float> [#uses=1]
+  %51 = fmul float %24, %47                       ; <float> [#uses=1]
+  %52 = fadd float %50, %51                       ; <float> [#uses=1]
+  %53 = fsub float %37, %21                       ; <float> [#uses=2]
+  %54 = fmul float undef, undef                   ; <float> [#uses=1]
+  %55 = fmul float undef, undef                   ; <float> [#uses=1]
+  %56 = fsub float %54, %55                       ; <float> [#uses=1]
+  %57 = fmul float undef, %53                     ; <float> [#uses=1]
+  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %59 = fmul float %58, undef                     ; <float> [#uses=1]
+  %60 = fsub float %57, %59                       ; <float> [#uses=1]
+  %61 = fmul float %58, undef                     ; <float> [#uses=1]
+  %62 = fmul float undef, %53                     ; <float> [#uses=1]
+  %63 = fsub float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %37, %56                       ; <float> [#uses=1]
+  %65 = fmul float %38, %60                       ; <float> [#uses=1]
+  %66 = fadd float %64, %65                       ; <float> [#uses=1]
+  %67 = fmul float %40, %63                       ; <float> [#uses=1]
+  %68 = fadd float %66, %67                       ; <float> [#uses=1]
+  %69 = select i1 undef, float %36, float %52     ; <float> [#uses=1]
+  %70 = select i1 undef, float %69, float %68     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
+  %71 = fdiv float %70, %iftmp.164.0              ; <float> [#uses=1]
+  store float %71, float* null, align 4
+  %72 = icmp eq %bar* null, %0                    ; <i1> [#uses=1]
+  br i1 %72, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index d8019a0..062133e 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
 
 define float @f(float %z, double %a, float %b) {
-; CHECK: fcpys s0, s1
+; CHECK: vmov.f32 s0, s1
         %tmp = call float @g(float %b)
         ret float %tmp
 }
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index 5f3ed1d..fac2bc5 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep fcmpes
+; RUN:   grep vcmpe.f32
 
 define void @test3(float* %glob, i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 5690a01..46f136b 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index a01f868..1426a2d 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index bf7c305..a6d7410 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm | grep bic | count 2
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep fneg | count 2
+; RUN:   grep vneg | count 2
 
 define float @test1(float %x, double %y) {
 	%tmp = fpext float %x to double
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 2af250d..45803f6 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index ebf1d84..6db2385 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2
-; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index 5c31ea6..57efa82 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index c6e6d40..31b5c52 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index cb5dade..735263c 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 7da443d..bc3d42d 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
 
 define float @test1(float* %a) {
 entry:
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 8fc13e7..724947e 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -4,14 +4,14 @@
 
 define float @test(float %acc, float %a, float %b) {
 entry:
-; VFP2: fnmacs
-; NEON: fnmacs
+; VFP2: vmls.f32
+; NEON: vmls.f32
 
 ; NEONFP-NOT: vmls
-; NEONFP-NOT: fcpys
+; NEONFP-NOT: vmov.f32
 ; NEONFP:     vmul.f32
 ; NEONFP:     vsub.f32
-; NEONFP:     fmrs
+; NEONFP:     vmov
 
 	%0 = fmul float %a, %b
         %1 = fsub float %acc, %0
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 3ae437d..ad21882 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
 
 define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0 
+; CHECK: vnmla.f32 s2, s1, s0
 entry:
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
@@ -14,7 +14,7 @@ entry:
 }
 
 define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0 
+; CHECK: vnmla.f32 s2, s1, s0
 entry:
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 613b347..6d7bc05 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64
 
 
 define double @t1(double %a, double %b) {
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 4e4ef72..8fbd45b 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -2,9 +2,9 @@
 
 define float @f(i32 %a) {
 ;CHECK: f:
-;CHECK: fmsr
-;CHECK-NEXT: fsitos
-;CHECK-NEXT: fmrs
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.s32
+;CHECK-NEXT: vmov
 entry:
         %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
@@ -12,9 +12,9 @@ entry:
 
 define double @g(i32 %a) {
 ;CHECK: g:
-;CHECK: fmsr
-;CHECK-NEXT: fsitod
-;CHECK-NEXT: fmrrd
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.s32
+;CHECK-NEXT: vmov
 entry:
         %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
@@ -22,9 +22,9 @@ entry:
 
 define double @uint_to_double(i32 %a) {
 ;CHECK: uint_to_double:
-;CHECK: fmsr
-;CHECK-NEXT: fuitod
-;CHECK-NEXT: fmrrd
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.u32
+;CHECK-NEXT: vmov
 entry:
         %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
@@ -32,9 +32,9 @@ entry:
 
 define float @uint_to_float(i32 %a) {
 ;CHECK: uint_to_float:
-;CHECK: fmsr
-;CHECK-NEXT: fuitos
-;CHECK-NEXT: fmrs
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.u32
+;CHECK-NEXT: vmov
 entry:
         %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
@@ -42,8 +42,8 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: fldd
-;CHECK-NEXT: fmrrd
+;CHECK: vldr.64 
+;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
         ret double %tmp
@@ -58,13 +58,13 @@ entry:
 
 define double @f2(double %a) {
 ;CHECK: f2:
-;CHECK-NOT: fmdrr
+;CHECK-NOT: vmov
         ret double %a
 }
 
 define void @f3() {
 ;CHECK: f3:
-;CHECK-NOT: fmdrr
+;CHECK-NOT: vmov
 ;CHECK: f4
 entry:
         %tmp = call double @f5( )               ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 9ce2ac5..2adac78 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -6,7 +6,7 @@
 
 define i32 @test1(float %a, float %b) {
 ; VFP2: test1:
-; VFP2: ftosizs s0, s0
+; VFP2: vcvt.s32.f32 s0, s0
 ; NEON: test1:
 ; NEON: vcvt.s32.f32 d0, d0
 entry:
@@ -17,7 +17,7 @@ entry:
 
 define i32 @test2(float %a, float %b) {
 ; VFP2: test2:
-; VFP2: ftouizs s0, s0
+; VFP2: vcvt.u32.f32 s0, s0
 ; NEON: test2:
 ; NEON: vcvt.u32.f32 d0, d0
 entry:
@@ -28,7 +28,7 @@ entry:
 
 define float @test3(i32 %a, i32 %b) {
 ; VFP2: test3:
-; VFP2: fuitos s0, s0
+; VFP2: vcvt.f32.u32 s0, s0
 ; NEON: test3:
 ; NEON: vcvt.f32.u32 d0, d0
 entry:
@@ -39,7 +39,7 @@ entry:
 
 define float @test4(i32 %a, i32 %b) {
 ; VFP2: test4:
-; VFP2: fsitos s0, s0
+; VFP2: vcvt.f32.s32 s0, s0
 ; NEON: test4:
 ; NEON: vcvt.f32.s32 d0, d0
 entry:
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index ebeeb18..ce6d6b2 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -2,7 +2,7 @@
 
 define float @f1(float %a, float %b) {
 ;CHECK: f1:
-;CHECK: fadds
+;CHECK: vadd.f32
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -10,7 +10,7 @@ entry:
 
 define double @f2(double %a, double %b) {
 ;CHECK: f2:
-;CHECK: faddd
+;CHECK: vadd.f64
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -18,7 +18,7 @@ entry:
 
 define float @f3(float %a, float %b) {
 ;CHECK: f3:
-;CHECK: fmuls
+;CHECK: vmul.f32
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -26,7 +26,7 @@ entry:
 
 define double @f4(double %a, double %b) {
 ;CHECK: f4:
-;CHECK: fmuld
+;CHECK: vmul.f64
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -34,7 +34,7 @@ entry:
 
 define float @f5(float %a, float %b) {
 ;CHECK: f5:
-;CHECK: fsubs
+;CHECK: vsub.f32
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
@@ -42,7 +42,7 @@ entry:
 
 define double @f6(double %a, double %b) {
 ;CHECK: f6:
-;CHECK: fsubd
+;CHECK: vsub.f64
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
@@ -58,7 +58,7 @@ entry:
 
 define double @f8(double %a) {
 ;CHECK: f8:
-;CHECK: fnegd
+;CHECK: vneg.f64
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
@@ -66,7 +66,7 @@ entry:
 
 define float @f9(float %a, float %b) {
 ;CHECK: f9:
-;CHECK: fdivs
+;CHECK: vdiv.f32
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
 	ret float %tmp1
@@ -74,7 +74,7 @@ entry:
 
 define double @f10(double %a, double %b) {
 ;CHECK: f10:
-;CHECK: fdivd
+;CHECK: vdiv.f64
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
 	ret double %tmp1
@@ -92,7 +92,7 @@ declare float @fabsf(float)
 
 define double @f12(double %a) {
 ;CHECK: f12:
-;CHECK: fabsd
+;CHECK: vabs.f64
 entry:
 	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
 	ret double %tmp1
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index 2c9591c..260ec49 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(float %a) {
 ;CHECK: f1:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movmi
 entry:
         %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -12,7 +12,7 @@ entry:
 
 define i32 @f2(float %a) {
 ;CHECK: f2:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: moveq
 entry:
         %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -22,7 +22,7 @@ entry:
 
 define i32 @f3(float %a) {
 ;CHECK: f3:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movgt
 entry:
         %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -32,7 +32,7 @@ entry:
 
 define i32 @f4(float %a) {
 ;CHECK: f4:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movge
 entry:
         %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -42,7 +42,7 @@ entry:
 
 define i32 @f5(float %a) {
 ;CHECK: f5:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movls
 entry:
         %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -52,7 +52,7 @@ entry:
 
 define i32 @f6(float %a) {
 ;CHECK: f6:
-;CHECK: fcmpes
+;CHECK: vcmpe.f32
 ;CHECK: movne
 entry:
         %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
@@ -62,7 +62,7 @@ entry:
 
 define i32 @g1(double %a) {
 ;CHECK: g1:
-;CHECK: fcmped
+;CHECK: vcmpe.f64
 ;CHECK: movmi
 entry:
         %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index ee3c338..bf197a4 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -3,7 +3,7 @@
 
 define float @f1(double %x) {
 ;CHECK-VFP: f1:
-;CHECK-VFP: fcvtsd
+;CHECK-VFP: vcvt.f32.f64
 ;CHECK: f1:
 ;CHECK: truncdfsf2
 entry:
@@ -13,7 +13,7 @@ entry:
 
 define double @f2(float %x) {
 ;CHECK-VFP: f2:
-;CHECK-VFP: fcvtds
+;CHECK-VFP: vcvt.f64.f32
 ;CHECK: f2:
 ;CHECK: extendsfdf2
 entry:
@@ -23,7 +23,7 @@ entry:
 
 define i32 @f3(float %x) {
 ;CHECK-VFP: f3:
-;CHECK-VFP: ftosizs
+;CHECK-VFP: vcvt.s32.f32
 ;CHECK: f3:
 ;CHECK: fixsfsi
 entry:
@@ -33,7 +33,7 @@ entry:
 
 define i32 @f4(float %x) {
 ;CHECK-VFP: f4:
-;CHECK-VFP: ftouizs
+;CHECK-VFP: vcvt.u32.f32
 ;CHECK: f4:
 ;CHECK: fixunssfsi
 entry:
@@ -43,7 +43,7 @@ entry:
 
 define i32 @f5(double %x) {
 ;CHECK-VFP: f5:
-;CHECK-VFP: ftosizd
+;CHECK-VFP: vcvt.s32.f64
 ;CHECK: f5:
 ;CHECK: fixdfsi
 entry:
@@ -53,7 +53,7 @@ entry:
 
 define i32 @f6(double %x) {
 ;CHECK-VFP: f6:
-;CHECK-VFP: ftouizd
+;CHECK-VFP: vcvt.u32.f64
 ;CHECK: f6:
 ;CHECK: fixunsdfsi
 entry:
@@ -63,7 +63,7 @@ entry:
 
 define float @f7(i32 %a) {
 ;CHECK-VFP: f7:
-;CHECK-VFP: fsitos
+;CHECK-VFP: vcvt.f32.s32
 ;CHECK: f7:
 ;CHECK: floatsisf
 entry:
@@ -73,7 +73,7 @@ entry:
 
 define double @f8(i32 %a) {
 ;CHECK-VFP: f8:
-;CHECK-VFP: fsitod
+;CHECK-VFP: vcvt.f64.s32
 ;CHECK: f8:
 ;CHECK: floatsidf
 entry:
@@ -83,7 +83,7 @@ entry:
 
 define float @f9(i32 %a) {
 ;CHECK-VFP: f9:
-;CHECK-VFP: fuitos
+;CHECK-VFP: vcvt.f32.u32
 ;CHECK: f9:
 ;CHECK: floatunsisf
 entry:
@@ -93,7 +93,7 @@ entry:
 
 define double @f10(i32 %a) {
 ;CHECK-VFP: f10:
-;CHECK-VFP: fuitod
+;CHECK-VFP: vcvt.f64.u32
 ;CHECK: f10:
 ;CHECK: floatunsidf
 entry:
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 0822fbf..c3cff18 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: flds{{.*}}[
+; CHECK: vldr.32{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: fsts{{.*}}[
+; CHECK: vstr.32{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 0d270b0..4cacc5d 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep -E {vmov\\W*r\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
 
 @i = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index 060dd46..f84ccdd 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 8ed58bd..886c0d5 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -40,14 +40,14 @@ define i32 @test1() {
 
 ; DarwinPIC: _test1:
 ; DarwinPIC: 	ldr r0, LCPI1_0
-; DarwinPIC: LPC0:
+; DarwinPIC: LPC1_0:
 ; DarwinPIC:    ldr r0, [pc, +r0]
 ; DarwinPIC:    ldr r0, [r0]
 ; DarwinPIC:    bx lr
 
 ; DarwinPIC: 	.align	2
 ; DarwinPIC: LCPI1_0:
-; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC0+8)
+; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC1_0+8)
 
 ; DarwinPIC: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
 ; DarwinPIC:	.align	2
@@ -61,7 +61,7 @@ define i32 @test1() {
 ; LinuxPIC: 	ldr r0, .LCPI1_0
 ; LinuxPIC: 	ldr r1, .LCPI1_1
 	
-; LinuxPIC: .LPC0:
+; LinuxPIC: .LPC1_0:
 ; LinuxPIC: 	add r0, pc, r0
 ; LinuxPIC: 	ldr r0, [r1, +r0]
 ; LinuxPIC: 	ldr r0, [r0]
@@ -69,7 +69,7 @@ define i32 @test1() {
 
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI1_0:
-; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0+8)
+; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC1_0+8)
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI1_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index e9145ac..623f2cb 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -11,7 +11,7 @@ entry:
 
 define void @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
-; CHECK: ldmltfd sp!, {r7, pc}
+; CHECK: ldmfdlt sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 5824115..d7fcf7d 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmhi | count 1
+; RUN:   grep ldmfdhi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index f9cf88f..c60ad93 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmeq | count 1
+; RUN:   grep ldmfdeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 6cb8e7b..a7da834 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmne | count 1
+; RUN:   grep ldmfdne | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
 
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index cf4a1ab..8b56f13 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -55,6 +55,6 @@ L1:                                               ; preds = %L2, %bb2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
-; ARM: .long L_foo_L5-(LPC{{.*}}+8)
-; THUMB: .long L_foo_L5-(LPC{{.*}}+4)
-; THUMB2: .long L_foo_L5
+; ARM: .long LBA4__foo__L5-(LPC{{.*}}+8)
+; THUMB: .long LBA4__foo__L5-(LPC{{.*}}+4)
+; THUMB2: .long LBA4__foo__L5
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 2796dec..c78872a 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep fstd
-; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
 
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 547bab7..130277b 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
 ; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
 
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 85c8b5b..29c55c6 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -60,7 +60,7 @@ define double @f7(double %a, double %b) {
 ;CHECK: movlt
 ;CHECK: movlt
 ;CHECK-VFP: f7:
-;CHECK-VFP: fcpydmi
+;CHECK-VFP: vmovmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index f4b27a7..5ad7ecc 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: vstmia sp
-; CHECK: vldmia sp
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
 entry:
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 50000e31..44a44af 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -15,11 +15,11 @@ declare double @fabs(double)
 define void @test_abs(float* %P, double* %D) {
 ;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: fabss
+;CHECK: vabs.f32
 	%b = call float @fabsf( float %a )		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: fabsd
+;CHECK: vabs.f64
 	%B = call double @fabs( double %A )		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
@@ -39,10 +39,10 @@ define void @test_add(float* %P, double* %D) {
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: fcvtds
+;CHECK: vcvt.f64.f32
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: fcvtsd
+;CHECK: vcvt.f32.f64
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -54,7 +54,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
-;CHECK: fmscs
+;CHECK: vnmls.f32
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
@@ -64,7 +64,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 define i32 @test_ftoi(float* %P1) {
 ;CHECK: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
-;CHECK: ftosizs
+;CHECK: vcvt.s32.f32
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -72,7 +72,7 @@ define i32 @test_ftoi(float* %P1) {
 define i32 @test_ftou(float* %P1) {
 ;CHECK: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
-;CHECK: ftouizs
+;CHECK: vcvt.u32.f32
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -80,7 +80,7 @@ define i32 @test_ftou(float* %P1) {
 define i32 @test_dtoi(double* %P1) {
 ;CHECK: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
-;CHECK: ftosizd
+;CHECK: vcvt.s32.f64
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
@@ -88,14 +88,14 @@ define i32 @test_dtoi(double* %P1) {
 define i32 @test_dtou(double* %P1) {
 ;CHECK: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
-;CHECK: ftouizd
+;CHECK: vcvt.u32.f64
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define void @test_utod(double* %P1, i32 %X) {
 ;CHECK: test_utod:
-;CHECK: fuitod
+;CHECK: vcvt.f64.u32
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
@@ -103,7 +103,7 @@ define void @test_utod(double* %P1, i32 %X) {
 
 define void @test_utod2(double* %P1, i8 %X) {
 ;CHECK: test_utod2:
-;CHECK: fuitod
+;CHECK: vcvt.f64.u32
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
@@ -141,7 +141,7 @@ define void @test_cmpfp0(float* %glob, i32 %X) {
 ;CHECK: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
-;CHECK: fcmpezs
+;CHECK: vcmpe.f32
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index f0df798..5dd87d6 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -204,8 +204,8 @@ define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 
 define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
 ;CHECK: test_vset_lanef32:
-;CHECK: fcpys
-;CHECK: fcpys
+;CHECK: vmov.f32
+;CHECK: vmov.f32
 entry:
   %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
   ret <2 x float> %0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index ed69f97..e4368d6 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -134,6 +134,26 @@ define <2 x i64> @v_movQi64() nounwind {
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
+; Check for correct assembler printing for immediate values.
+%struct.int8x8_t = type { <8 x i8> }
+define arm_apcscc void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupn128:
+;CHECK: vmov.i8 d0, #0x80
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define arm_apcscc void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupnneg75:
+;CHECK: vmov.i8 d0, #0xB5
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
+  ret void
+}
+
 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 ;CHECK: vmovls8:
 ;CHECK: vmovl.s8
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
new file mode 100644
index 0000000..a51c75d
--- /dev/null
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s
+; PR5495
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
+%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
+%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words" = type { i8*, i32 }
+%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+%union..0._15 = type { i32 }
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
+
+define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"* %__in, i8* nocapture %__s) {
+entry:
+  %0 = invoke %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"* undef)
+          to label %invcont8 unwind label %lpad74 ; <%"struct.std::ctype<char>"*> [#uses=0]
+
+invcont8:                                         ; preds = %entry
+  %1 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %bb26.preheader unwind label %lpad ; <i32> [#uses=0]
+
+bb26.preheader:                                   ; preds = %invcont8
+  br label %invcont38
+
+bb1.i100:                                         ; preds = %invcont38
+  %2 = add nsw i32 1, %__extracted.0  ; <i32> [#uses=3]
+  br i1 undef, label %bb.i97, label %bb1.i
+
+bb.i97:                                           ; preds = %bb1.i100
+  br label %invcont38
+
+bb1.i:                                            ; preds = %bb1.i100
+  %3 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont38 unwind label %lpad ; <i32> [#uses=0]
+
+invcont24:                                        ; preds = %invcont38
+  %4 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i unwind label %lpad ; <i32> [#uses=0]
+
+_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i: ; preds = %invcont24
+  br i1 undef, label %invcont25, label %bb.i93
+
+bb.i93:                                           ; preds = %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  %5 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont25 unwind label %lpad ; <i32> [#uses=0]
+
+invcont25:                                        ; preds = %bb.i93, %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  br label %invcont38
+
+invcont38:                                        ; preds = %invcont25, %bb1.i, %bb.i97, %bb26.preheader
+  %__extracted.0 = phi i32 [ 0, %bb26.preheader ], [ undef, %invcont25 ], [ %2, %bb.i97 ], [ %2, %bb1.i ] ; <i32> [#uses=1]
+  br i1 false, label %bb1.i100, label %invcont24
+
+lpad:                                             ; preds = %bb.i93, %invcont24, %bb1.i, %invcont8
+  %__extracted.1 = phi i32 [ 0, %invcont8 ], [ %2, %bb1.i ], [ undef, %bb.i93 ], [ undef, %invcont24 ] ; <i32> [#uses=0]
+  %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1]
+  %6 = call i8* @__cxa_begin_catch(i8* %eh_ptr) nounwind ; <i8*> [#uses=0]
+  unreachable
+
+lpad74:                                           ; preds = %entry
+  unreachable
+}
diff --git a/test/CodeGen/Generic/switch-lower.ll b/test/CodeGen/Generic/switch-lower.ll
index eb240ed..1cefe82 100644
--- a/test/CodeGen/Generic/switch-lower.ll
+++ b/test/CodeGen/Generic/switch-lower.ll
@@ -1,8 +1,22 @@
 ; RUN: llc < %s
-; PR1197
 
 
-define void @exp_attr__expand_n_attribute_reference() {
+; PR5421
+define void @test1() {
+entry:
+  switch i128 undef, label %exit [
+    i128 55340232221128654848, label %exit
+    i128 92233720368547758080, label %exit
+    i128 73786976294838206464, label %exit
+    i128 147573952589676412928, label %exit
+  ]
+exit:
+  unreachable
+}
+
+
+; PR1197
+define void @test2() {
 entry:
 	br i1 false, label %cond_next954, label %cond_true924
 
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index cc574c7..4d7d9b9 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-unknown-unknown"
 
-@"\010x0021" = common global i8 0, align 1        ; <i8*> [#uses=2]
+@"\010x0021" = external global i8, align 1        ; <i8*> [#uses=2]
 
 define zeroext i8 @foo(i8 zeroext %x) nounwind {
 entry:
diff --git a/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll b/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
new file mode 100644
index 0000000..94fe5c7
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+@g_29 = common global i8 0, align 1               ; <i8*> [#uses=0]
+
+define signext i8 @foo(i8 signext %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: call #__mulqi3
+  %mul = mul i8 %_si2, %_si1                      ; <i8> [#uses=1]
+  ret i8 %mul
+}
+
+define void @uint81(i16* nocapture %p_32) nounwind {
+entry:
+  %call = tail call i16 @bar(i8* bitcast (i8 (i8, i8)* @foo to i8*)) nounwind ; <i16> [#uses=0]
+  ret void
+}
+
+declare i16 @bar(i8*)
diff --git a/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll b/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
new file mode 100644
index 0000000..d232aea
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+%struct.httpd_fs_file = type { i8*, i16 }
+%struct.psock = type { %struct.pt, %struct.pt, i8*, i8*, i8*, i16, i16, %struct.httpd_fs_file, i16, i8, i8 }
+%struct.pt = type { i16 }
+
+@foo = external global i8*
+
+define signext i8 @psock_readto(%struct.psock* nocapture %psock, i8 zeroext %c) nounwind {
+entry:
+  switch i16 undef, label %sw.epilog [
+    i16 0, label %sw.bb
+    i16 283, label %if.else.i
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %while.cond36.i, %while.end.i, %sw.bb
+  br label %while.cond.i
+
+if.else.i:                                        ; preds = %entry
+  br i1 undef, label %psock_newdata.exit, label %if.else11.i
+
+if.else11.i:                                      ; preds = %if.else.i
+  ret i8 0
+
+psock_newdata.exit:                               ; preds = %if.else.i
+  ret i8 0
+
+while.cond.i:                                     ; preds = %while.body.i, %do.body
+  br i1 undef, label %while.end.i, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %do.end41, label %while.cond.i
+
+while.end.i:                                      ; preds = %while.cond.i
+  br i1 undef, label %do.body, label %while.cond36.i.preheader
+
+while.cond36.i.preheader:                         ; preds = %while.end.i
+  br label %while.cond36.i
+
+while.cond36.i:                                   ; preds = %while.body41.i, %while.cond36.i.preheader
+  br i1 undef, label %do.body, label %while.body41.i
+
+while.body41.i:                                   ; preds = %while.cond36.i
+  %tmp43.i = load i8** @foo                      ; <i8*> [#uses=2]
+  %tmp44.i = load i8* %tmp43.i                    ; <i8> [#uses=1]
+  %ptrincdec50.i = getelementptr inbounds i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
+  store i8* %ptrincdec50.i, i8** @foo
+  %cmp55.i = icmp eq i8 %tmp44.i, %c              ; <i1> [#uses=1]
+  br i1 %cmp55.i, label %do.end41, label %while.cond36.i
+
+do.end41:                                         ; preds = %while.body41.i, %while.body.i
+  br i1 undef, label %if.then46, label %sw.epilog
+
+if.then46:                                        ; preds = %do.end41
+  ret i8 0
+
+sw.epilog:                                        ; preds = %do.end41, %entry
+  ret i8 2
+}
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
new file mode 100644
index 0000000..3340494
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -0,0 +1,74 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16 %x, i16* %a) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am1:
+; CHECK:		bis.w	0(r14), r15
+
+@foo = external global i16
+
+define i16 @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am2:
+; CHECK:		bis.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	%3 = or i8 %2,%x
+	ret i8 %3
+}
+; CHECK: am3:
+; CHECK:		bis.b	&bar(r14), r15
+
+define i16 @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am4:
+; CHECK:		bis.w	&32, r15
+
+define i16 @am5(i16 %x, i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %2,%x
+	ret i16 %3
+}
+; CHECK: am5:
+; CHECK:		bis.w	4(r14), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am6:
+; CHECK:		bis.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	%4 = or i8 %3,%x
+	ret i8 %4
+}
+; CHECK: am7:
+; CHECK:		bis.b	&duh+2(r14), r15
+
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
new file mode 100644
index 0000000..ca79fb6
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -0,0 +1,81 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %x) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %x, %1
+	store i16 %2, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		bis.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %x, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		bis.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %x) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	%2 = load i8* %1
+	%3 = or i8 %x, %2
+	store i8 %3, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		bis.b	r14, &bar(r15)
+
+define void @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %x, %1
+	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		bis.w	r15, &32
+
+define void @am5(i16* %a, i16 %x) readonly {
+	%1 = getelementptr inbounds i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %x, %2
+	store i16 %3, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		bis.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer
+
+define void @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %x, %1
+	store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		bis.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %x) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3 = load i8* %2
+	%4 = or i8 %x, %3
+	store i8 %4, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		bis.b	r14, &duh+2(r15)
+
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
new file mode 100644
index 0000000..67cbb02
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -0,0 +1,67 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16* %a) nounwind {
+	%1 = load i16* %a
+	ret i16 %1
+}
+; CHECK: am1:
+; CHECK:		mov.w	0(r15), r15
+
+@foo = external global i16
+
+define i16 @am2() nounwind {
+	%1 = load i16* @foo
+	ret i16 %1
+}
+; CHECK: am2:
+; CHECK:		mov.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	ret i8 %2
+}
+; CHECK: am3:
+; CHECK:		mov.b	&bar(r15), r15
+
+define i16 @am4() nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	ret i16 %1
+}
+; CHECK: am4:
+; CHECK:		mov.w	&32, r15
+
+define i16 @am5(i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	ret i16 %2
+}
+; CHECK: am5:
+; CHECK:		mov.w	4(r15), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6() nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret i16 %1
+}
+; CHECK: am6:
+; CHECK:		mov.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	ret i8 %3
+}
+; CHECK: am7:
+; CHECK:		mov.b	&duh+2(r15), r15
+
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
new file mode 100644
index 0000000..b8155d3
--- /dev/null
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -0,0 +1,67 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %b) nounwind {
+	store i16 %b, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		mov.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %a) nounwind {
+	store i16 %a, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		mov.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %a) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	store i8 %a, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		mov.b	r14, &bar(r15)
+
+define void @am4(i16 %a) nounwind {
+	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		mov.w	r15, &32
+
+define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
+	%1 = getelementptr inbounds i16* %p, i16 2
+	store i16 %a, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		mov.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define void @am6(i16 %a) nounwind {
+	store i16 %a, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		mov.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %a) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	store i8 %a, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		mov.b	r14, &duh+2(r15)
+
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
index 53334aa..2613f01 100644
--- a/test/CodeGen/MSP430/Inst16mr.ll
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -37,6 +37,16 @@ define void @bis(i16 %a) nounwind {
 	ret void
 }
 
+define void @bic(i16 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.w   r15, &foo
+        %1 = xor i16 %m, -1
+        %2 = load i16* @foo
+        %3 = and i16 %2, %1
+        store i16 %3, i16* @foo
+        ret void
+}
+
 define void @xor(i16 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	r15, &foo
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
index d0cb0d1..02e89c7 100644
--- a/test/CodeGen/MSP430/Inst16rm.ll
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -19,7 +19,6 @@ define i16 @and(i16 %a) nounwind {
 	ret i16 %2
 }
 
-
 define i16 @bis(i16 %a) nounwind {
 ; CHECK: bis:
 ; CHECK: bis.w	&foo, r15
@@ -28,6 +27,15 @@ define i16 @bis(i16 %a) nounwind {
 	ret i16 %2
 }
 
+define i16  @bic(i16 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	&foo, r15
+        %1 = load i16* @foo
+        %2 = xor i16 %1, -1
+        %3 = and i16 %a, %2
+        ret i16 %3
+}
+
 define i16 @xor(i16 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	&foo, r15
diff --git a/test/CodeGen/MSP430/Inst16rr.ll b/test/CodeGen/MSP430/Inst16rr.ll
index 6619c51..2f1ba5b 100644
--- a/test/CodeGen/MSP430/Inst16rr.ll
+++ b/test/CodeGen/MSP430/Inst16rr.ll
@@ -29,6 +29,14 @@ define i16 @bis(i16 %a, i16 %b) nounwind {
 	ret i16 %1
 }
 
+define i16 @bic(i16 %a, i16 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	r14, r15
+        %1 = xor i16 %b, -1
+        %2 = and i16 %a, %1
+        ret i16 %2
+}
+
 define i16 @xor(i16 %a, i16 %b) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	r14, r15
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
index 04c681e..428d1fa 100644
--- a/test/CodeGen/MSP430/Inst8mr.ll
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -37,6 +37,16 @@ define void @bis(i8 %a) nounwind {
 	ret void
 }
 
+define void @bic(i8 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.b   r15, &foo
+        %1 = xor i8 %m, -1
+        %2 = load i8* @foo
+        %3 = and i8 %2, %1
+        store i8 %3, i8* @foo
+        ret void
+}
+
 define void @xor(i8 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.b	r15, &foo
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
index 62a5d4b..c062f04 100644
--- a/test/CodeGen/MSP430/Inst8rm.ll
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -19,7 +19,6 @@ define i8 @and(i8 %a) nounwind {
 	ret i8 %2
 }
 
-
 define i8 @bis(i8 %a) nounwind {
 ; CHECK: bis:
 ; CHECK: bis.b	&foo, r15
@@ -28,6 +27,15 @@ define i8 @bis(i8 %a) nounwind {
 	ret i8 %2
 }
 
+define i8  @bic(i8 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  &foo, r15
+        %1 = load i8* @foo
+        %2 = xor i8 %1, -1
+        %3 = and i8 %a, %2
+        ret i8 %3
+}
+
 define i8 @xor(i8 %a) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.b	&foo, r15
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 90ea945..74feaae 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -29,6 +29,14 @@ define i8 @bis(i8 %a, i8 %b) nounwind {
 	ret i8 %1
 }
 
+define i8 @bic(i8 %a, i8 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  r14, r15
+        %1 = xor i8 %b, -1
+        %2 = and i8 %a, %1
+        ret i8 %2
+}
+
 define i8 @xor(i8 %a, i8 %b) nounwind {
 ; CHECK: xor:
 ; CHECK: xor.w	r14, r15
diff --git a/test/CodeGen/MSP430/inline-asm.ll b/test/CodeGen/MSP430/inline-asm.ll
index 2cc25a4..0e7886a 100644
--- a/test/CodeGen/MSP430/inline-asm.ll
+++ b/test/CodeGen/MSP430/inline-asm.ll
@@ -20,6 +20,7 @@ define void @immmem() nounwind {
 }
 
 define void @mem() nounwind {
-        call void asm sideeffect "bic\09$0,r2", "m"(i16* @foo) nounwind
+        %fooval = load i16* @foo
+        call void asm sideeffect "bic\09$0,r2", "m"(i16 %fooval) nounwind
         ret void
 }
diff --git a/test/CodeGen/MSP430/postinc.ll b/test/CodeGen/MSP430/postinc.ll
new file mode 100644
index 0000000..8f01b83
--- /dev/null
+++ b/test/CodeGen/MSP430/postinc.ll
@@ -0,0 +1,114 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430"
+
+define zeroext i16 @add(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: add:
+; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @sub(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: sub:
+; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @or(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: or:
+; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @xor(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: xor:
+; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @and(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: and:
+; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index c41d521..1244a3e 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -1,6 +1,7 @@
 ; Double return in abicall (default)
 ; RUN: llc < %s -march=mips
 ; PR2615
+; XFAIL: *
 
 define double @main(...) {
 entry:
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
new file mode 100644
index 0000000..f59639f
--- /dev/null
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+define float @h() nounwind readnone {
+entry:
+; CHECK: lui $2, %hi($CPI1_0)
+; CHECK: lwc1 $f0, %lo($CPI1_0)($2)
+  ret float 0x400B333340000000
+}
diff --git a/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll b/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
new file mode 100644
index 0000000..2d9d16a
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+define void @gcov_exit() nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  br i1 undef, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  br i1 undef, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  br i1 undef, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  %c_ix.0.3 = phi i32 [ undef, %bb48.3 ], [ undef, %bb49.2 ] ; <i32> [#uses=1]
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  store i64* undef, i64** undef, align 4
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  %0 = getelementptr inbounds [5 x i64*]* undef, i32 0, i32 %c_ix.0.3 ; <i64**> [#uses=0]
+  br label %bb51
+}
diff --git a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
new file mode 100644
index 0000000..54f4b2e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.gcov_ctr_info = type { i32, i64*, void (i64*, i32)* }
+%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
+%struct.gcov_fn_info = type { i32, i32, [0 x i32] }
+%struct.gcov_info = type { i32, %struct.gcov_info*, i32, i8*, i32, %struct.gcov_fn_info*, i32, [0 x %struct.gcov_ctr_info] }
+%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
+
+@__gcov_var = external global %struct.__gcov_var  ; <%struct.__gcov_var*> [#uses=1]
+@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
+@.str = external constant [56 x i8], align 4      ; <[56 x i8]*> [#uses=1]
+@gcov_list = external global %struct.gcov_info*   ; <%struct.gcov_info**> [#uses=1]
+@.str7 = external constant [35 x i8], align 4     ; <[35 x i8]*> [#uses=1]
+@.str8 = external constant [9 x i8], align 4      ; <[9 x i8]*> [#uses=1]
+@.str9 = external constant [10 x i8], align 4     ; <[10 x i8]*> [#uses=1]
+@.str10 = external constant [36 x i8], align 4    ; <[36 x i8]*> [#uses=1]
+
+declare i32 @"\01_fprintf$LDBL128"(%struct.FILE*, i8*, ...) nounwind
+
+define void @gcov_exit() nounwind {
+entry:
+  %gi_ptr.0357 = load %struct.gcov_info** @gcov_list, align 4 ; <%struct.gcov_info*> [#uses=1]
+  %0 = alloca i8, i32 undef, align 1              ; <i8*> [#uses=3]
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  %object27 = bitcast %struct.gcov_summary* undef to i8* ; <i8*> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  %gi_ptr.1329 = phi %struct.gcov_info* [ %gi_ptr.0357, %bb.nph341 ], [ undef, %read_fatal ] ; <%struct.gcov_info*> [#uses=1]
+  call void @llvm.memset.i32(i8* %object27, i8 0, i32 36, i32 8)
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  %1 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([35 x i8]* @.str7, i32 0, i32 0), i8* %0) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  store i8 52, i8* undef, align 1
+  store i8 42, i8* undef, align 1
+  %2 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([56 x i8]* @.str, i32 0, i32 0), i8* %0, i8* undef, i8* undef) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  %3 = icmp eq i32 undef, -1                      ; <i1> [#uses=1]
+  %iftmp.11.0 = select i1 %3, i8* getelementptr inbounds ([10 x i8]* @.str9, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str8, i32 0, i32 0) ; <i8*> [#uses=1]
+  %4 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([36 x i8]* @.str10, i32 0, i32 0), i8* %0, i8* %iftmp.11.0) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  %5 = load i32* undef, align 4                   ; <i32> [#uses=1]
+  %6 = getelementptr inbounds %struct.gcov_info* %gi_ptr.1329, i32 0, i32 7, i32 undef, i32 2 ; <void (i64*, i32)**> [#uses=1]
+  %7 = load void (i64*, i32)** %6, align 4        ; <void (i64*, i32)*> [#uses=1]
+  %8 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
+  %9 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
+  %10 = icmp eq i32 %tmp386, %8                   ; <i1> [#uses=1]
+  br i1 %10, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  %11 = icmp eq i32 undef, %9                     ; <i1> [#uses=1]
+  br i1 %11, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  call void %7(i64* null, i32 %5) nounwind
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  %12 = add i32 %13, 1                            ; <i32> [#uses=1]
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  %13 = phi i32 [ %12, %bb74 ], [ 0, %bb65 ]      ; <i32> [#uses=2]
+  %tmp386 = add i32 0, 27328512                   ; <i32> [#uses=1]
+  %14 = shl i32 1, %13                            ; <i32> [#uses=1]
+  %15 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %16 = and i32 %15, %14                          ; <i32> [#uses=1]
+  %17 = icmp eq i32 %16, 0                        ; <i1> [#uses=1]
+  br i1 %17, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  store i32 -1, i32* getelementptr inbounds (%struct.__gcov_var* @__gcov_var, i32 0, i32 6), align 4
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  br label %bb51
+}
+
+declare i32 @__gcov_read_unsigned() nounwind
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index ee0a02b..1b302e41 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,12 +43,12 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r2, ha16(L_foo_L5-"L1$pb")
-; PIC: li r5, lo16(L_foo_L5-"L1$pb")
+; PIC: addis r4, r2, ha16(LBA4__foo__L5-"L1$pb")
+; PIC: li r5, lo16(LBA4__foo__L5-"L1$pb")
 ; PIC: add r4, r4, r5
 ; PIC: stw r4
-; STATIC: li r2, lo16(L_foo_L5)
-; STATIC: addis r2, r2, ha16(L_foo_L5)
+; STATIC: li r2, lo16(LBA4__foo__L5)
+; STATIC: addis r2, r2, ha16(LBA4__foo__L5)
 ; STATIC: stw r2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll
new file mode 100644
index 0000000..581d010
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s
+
+define i32 @_Z4funci(i32 %a) ssp {
+; CHECK:       mflr r0
+; CHECK-NEXT:  stw r31, 20(r1)
+; CHECK-NEXT:  stw r0, 8(r1)
+; CHECK-NEXT:  stwu r1, -80(r1)
+; CHECK-NEXT: Llabel1:
+; CHECK-NEXT:  mr r31, r1
+; CHECK-NEXT: Llabel2:
+entry:
+  %a_addr = alloca i32                            ; <i32*> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %a, i32* %a_addr
+  %1 = call i32 @_Z3barPi(i32* %a_addr)           ; <i32> [#uses=1]
+  store i32 %1, i32* %0, align 4
+  %2 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %2, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+declare i32 @_Z3barPi(i32*)
diff --git a/test/CodeGen/PowerPC/vec_auto_constant.ll b/test/CodeGen/PowerPC/vec_auto_constant.ll
new file mode 100644
index 0000000..973f089
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_auto_constant.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly produced .long, 7320806 (partial)
+; CHECK: .byte  22
+; CHECK: .byte  21
+; CHECK: .byte  20
+; CHECK: .byte  3
+; CHECK: .byte  25
+; CHECK: .byte  24
+; CHECK: .byte  23
+; CHECK: .byte  3
+; CHECK: .byte  28
+; CHECK: .byte  27
+; CHECK: .byte  26
+; CHECK: .byte  3
+; CHECK: .byte  31
+; CHECK: .byte  30
+; CHECK: .byte  29
+; CHECK: .byte  3
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 22, i8 21, i8 20, i8 3, i8 25, i8 24, i8 23, i8 3, i8 28, i8 27, i8 26, i8 3, i8 31, i8 30, i8 29, i8 3>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/vec_splat_constant.ll b/test/CodeGen/PowerPC/vec_splat_constant.ll
new file mode 100644
index 0000000..b227794
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_splat_constant.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly incorrectly inserted vsldoi (endian confusion)
+
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+; CHECK: _foo:
+; CHECK-NOT: vsldoi
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: blr
+}
diff --git a/test/CodeGen/Thumb/machine-licm.ll b/test/CodeGen/Thumb/machine-licm.ll
new file mode 100644
index 0000000..dae1412b
--- /dev/null
+++ b/test/CodeGen/Thumb/machine-licm.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
+; rdar://7353541
+; rdar://7354376
+
+; The generated code is no where near ideal. It's not recognizing the two
+; constantpool entries being loaded can be merged into one.
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define arm_apcscc void @t(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; CHECK: t:
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; CHECK: BB#1
+; CHECK: ldr.n r2, LCPI1_0
+; CHECK: add r2, pc
+; CHECK: ldr r{{[0-9]+}}, [r2]
+; CHECK: LBB1_2
+; CHECK: LCPI1_0:
+; CHECK-NOT: LCPI1_1:
+; CHECK: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index e84e867..8d03b52 100644
--- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -6,10 +6,8 @@
 
 define arm_apcscc void @t() nounwind {
 ; CHECK: t:
-; CHECK:      ittt eq
-; CHECK-NEXT: addeq
-; CHECK-NEXT: movweq
-; CHECK-NEXT: movteq
+; CHECK:      it eq
+; CHECK-NEXT: cmpeq
 entry:
   %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
   br i1 undef, label %land.rhs, label %lor.end
diff --git a/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
new file mode 100644
index 0000000..9f2e399
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i32, i16, i16, i8, i8 }
+%struct.SV = type { i8*, i32, i32 }
+
+declare arm_apcscc void @Perl_mg_set(%struct.SV*) nounwind
+
+define arm_apcscc %struct.OP* @Perl_pp_complement() nounwind {
+entry:
+  %0 = load %struct.SV** null, align 4            ; <%struct.SV*> [#uses=2]
+  br i1 undef, label %bb21, label %bb5
+
+bb5:                                              ; preds = %entry
+  br i1 undef, label %bb13, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb8, label %bb7
+
+bb7:                                              ; preds = %bb6
+  %1 = getelementptr inbounds %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 4                      ; <i8*> [#uses=1]
+  %3 = getelementptr inbounds i8* %2, i32 12      ; <i8*> [#uses=1]
+  %4 = bitcast i8* %3 to i32*                     ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=1]
+  %storemerge5 = xor i32 %5, -1                   ; <i32> [#uses=1]
+  call arm_apcscc  void @Perl_sv_setiv(%struct.SV* undef, i32 %storemerge5) nounwind
+  %6 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  %8 = and i32 %7, 16384                          ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb12, label %bb11
+
+bb8:                                              ; preds = %bb6
+  unreachable
+
+bb11:                                             ; preds = %bb7
+  call arm_apcscc  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb7
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb13:                                             ; preds = %bb5
+  %10 = call arm_apcscc  i32 @Perl_sv_2uv(%struct.SV* %0) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:                                             ; preds = %bb13
+  call arm_apcscc  void @Perl_sv_setiv(%struct.SV* undef, i32 undef) nounwind
+  br label %Perl_sv_setuv.exit
+
+bb1.i:                                            ; preds = %bb13
+  br label %Perl_sv_setuv.exit
+
+Perl_sv_setuv.exit:                               ; preds = %bb1.i, %bb.i
+  %11 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %13 = and i32 %12, 16384                        ; <i32> [#uses=1]
+  %14 = icmp eq i32 %13, 0                        ; <i1> [#uses=1]
+  br i1 %14, label %bb20, label %bb19
+
+bb19:                                             ; preds = %Perl_sv_setuv.exit
+  call arm_apcscc  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %Perl_sv_setuv.exit
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb21:                                             ; preds = %entry
+  br i1 undef, label %bb23, label %bb22
+
+bb22:                                             ; preds = %bb21
+  unreachable
+
+bb23:                                             ; preds = %bb21
+  unreachable
+
+bb44:                                             ; preds = %bb20, %bb12
+  ret %struct.OP* undef
+}
+
+declare arm_apcscc void @Perl_sv_setiv(%struct.SV*, i32) nounwind
+
+declare arm_apcscc i32 @Perl_sv_2uv(%struct.SV*) nounwind
diff --git a/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll b/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
new file mode 100644
index 0000000..8a67bb1
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; rdar://7394794
+
+define arm_apcscc void @lshift_double(i64 %l1, i64 %h1, i64 %count, i32 %prec, i64* nocapture %lv, i64* nocapture %hv, i32 %arith) nounwind {
+entry:
+  %..i = select i1 false, i64 0, i64 0            ; <i64> [#uses=1]
+  br i1 undef, label %bb11.i, label %bb6.i
+
+bb6.i:                                            ; preds = %entry
+  %0 = lshr i64 %h1, 0                            ; <i64> [#uses=1]
+  store i64 %0, i64* %hv, align 4
+  %1 = lshr i64 %l1, 0                            ; <i64> [#uses=1]
+  %2 = or i64 0, %1                               ; <i64> [#uses=1]
+  store i64 %2, i64* %lv, align 4
+  br label %bb11.i
+
+bb11.i:                                           ; preds = %bb6.i, %entry
+  store i64 %..i, i64* %lv, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 4320328..eefbae5 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 4
 
 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
 entry:
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 865b17b..18d507c 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -18,7 +18,7 @@ define void @test2() {
 define i32 @test3() {
 ; CHECK: test3:
 ; CHECK: sub.w sp, sp, #805306368
-; CHECK: sub sp, #4 * 4
+; CHECK: sub sp, #6 * 4
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
index 4fd4525..9286670 100644
--- a/test/CodeGen/Thumb2/load-global.ll
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -14,7 +14,7 @@ define i32 @test1() {
 
 ; PIC: _test1
 ; PIC: add r0, pc
-; PIC: .long L_G$non_lazy_ptr-(LPC0+4)
+; PIC: .long L_G$non_lazy_ptr-(LPC1_0+4)
 
 ; LINUX: test1
 ; LINUX: .long G(GOT)
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
new file mode 100644
index 0000000..7b1b57a
--- /dev/null
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
+; rdar://7387640
+
+; FIXME: We still need to rewrite array reference iv of stride -4 with loop
+; count iv of stride -1.
+
+@G = external global i32                          ; <i32*> [#uses=2]
+@array = external global i32*                     ; <i32**> [#uses=1]
+
+define arm_apcscc void @t() nounwind optsize {
+; CHECK: t:
+; CHECK: mov.w r2, #4000
+; CHECK: movw r3, #1001
+entry:
+  %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+; CHECK: LBB1_1:
+; CHECK: subs r3, #1
+; CHECK: cmp r3, #0
+; CHECK: sub.w r2, r2, #4
+  %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+  %tmp5 = sub i32 1000, %indvar                   ; <i32> [#uses=1]
+  %1 = load i32** @array, align 4                 ; <i32*> [#uses=1]
+  %scevgep = getelementptr i32* %1, i32 %tmp5     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, %0                         ; <i32> [#uses=2]
+  store i32 %3, i32* @G, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, 1001      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 64309c4..912939b 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
 ; rdar://7353541
+; rdar://7354376
 
 ; The generated code is no where near ideal. It's not recognizing the two
 ; constantpool entries being loaded can be merged into one.
@@ -15,8 +16,13 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_0
-; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_1
+; CHECK: ldr.n r2, LCPI1_0
+; CHECK: add r2, pc
+; CHECK: ldr r{{[0-9]+}}, [r2]
+; CHECK: LBB1_2
+; CHECK: LCPI1_0:
+; CHECK-NOT: LCPI1_1:
+; CHECK: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
 
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 64587c1..0fc6899 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -20,7 +20,8 @@ bb7:                                              ; preds = %bb3
   br i1 %a, label %bb11, label %bb9
 
 bb9:                                              ; preds = %bb7
-; CHECK:      @ BB#2:
+; CHECK:      cmp r0, #0
+; CHECK-NEXT: cmp r0, #0
 ; CHECK-NEXT: cbnz
   %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index 1d45d3c..496158c 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -23,7 +23,7 @@ bb52:                                             ; preds = %newFuncRoot
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: LBB1_2:
+; CHECK-NEXT: LBB1_1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
index 7d093ec..f5a56e5 100644
--- a/test/CodeGen/Thumb2/thumb2-jtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep tbb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | not grep tbb
 
 ; Do not use tbb / tbh if any destination is before the jumptable.
 ; rdar://7102917
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index b4274ad..44fa245 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -1,8 +1,12 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov | count 3
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mvn | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep it  | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t1
+; CHECK: mvn  r0, #-2147483648
+; CHECK: cmp r2, #10
+; CHECK: add.w r0, r1, r0
+; CHECK: it  gt
+; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
         %tmp3 = add i32 %tmp2, %b
@@ -10,6 +14,12 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t2
+; CHECK: add.w r0, r1, #-2147483648
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
         %tmp3 = add i32 %tmp2, %b
@@ -17,6 +27,11 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 }
 
 define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t3
+; CHECK: sub.w r0, r1, #10
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 10
         %tmp3 = sub i32 %b, %tmp2
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 7746cd3..b106ced 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,22 +1,24 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsl
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsr
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep asr
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ror
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep mov
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsl
+; CHECK: add.w  r0, r0, r1, lsl #16
         %A = shl i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsr
+; CHECK: add.w  r0, r0, r1, lsr #16
         %A = lshr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_asr
+; CHECK: add.w  r0, r0, r1, asr #16
         %A = ashr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
@@ -24,6 +26,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
 
 ; i32 ror(n) = (x >> n) | (x << (32 - n))
 define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_ror
+; CHECK: add.w  r0, r0, r1, ror #16
         %A = lshr i32 %Y, 16
         %B = shl  i32 %Y, 16
         %C = or   i32 %B, %A
@@ -32,6 +36,10 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
 }
 
 define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
+; CHECK: t2ADDrs_noRegShift
+; CHECK: uxtb r2, r2
+; CHECK: lsls r1, r2
+; CHECK: add  r0, r1
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
         %B = add i32 %X, %A
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index 66cc884..092ec27 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep smlabt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
+; CHECK: f3
+; CHECK: smlabt r0, r1, r2, r0
         %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
         %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
         %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index cdbf4ca..16ea85d 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,12 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep smulbt | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep smultt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 |  FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
 
 define i32 @f1(i32 %y) {
+; CHECK: f1
+; CHECK: smulbt r0, r1, r0
         %tmp = load i16* @x             ; <i16> [#uses=1]
         %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
         %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
@@ -16,6 +15,8 @@ define i32 @f1(i32 %y) {
 }
 
 define i32 @f2(i32 %x, i32 %y) {
+; CHECK: f2
+; CHECK: smultt r0, r1, r0
         %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
         %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
         %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 0a7221c..aef167b 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: vstmia sp
-; CHECK: vldmia sp
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
 entry:
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 6c804ee..9af960b 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {str.*\\!} | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
+; CHECK: test1
+; CHECK: str  r1, [r0, #+16]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
@@ -10,6 +11,8 @@ define void @test1(i32* %X, i32* %A, i32** %dest) {
 }
 
 define i16* @test2(i16* %X, i32* %A) {
+; CHECK: test2
+; CHECK: strh r1, [r0, #+8]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 33ed543..054d5df 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,16 +1,15 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep sxtb | count 2
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep sxtb | grep ror | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep sxtab | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test0(i8 %A) {
+; CHECK: test0
+; CHECK: sxtb r0, r0
         %B = sext i8 %A to i32
 	ret i32 %B
 }
 
 define i8 @test1(i32 %A) signext {
+; CHECK: test1
+; CHECK: sxtb.w r0, r0, ror #8
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
@@ -19,6 +18,9 @@ define i8 @test1(i32 %A) signext {
 }
 
 define i32 @test2(i32 %A, i32 %X) signext {
+; CHECK: test2
+; CHECK: lsrs r0, r0, #8
+; CHECK: sxtab  r0, r1, r0
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
index c5cb6f3..2cf1d6a 100644
--- a/test/CodeGen/Thumb2/thumb2-tbh.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -2,8 +2,6 @@
 
 ; Thumb2 target should reorder the bb's in order to use tbb / tbh.
 
-; XFAIL: *
-
 	%struct.R_flstr = type { i32, i32, i8* }
 	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
 @_C_nextcmd = external global i32		; <i32*> [#uses=3]
@@ -18,7 +16,7 @@ declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
 
 define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
 ; CHECK: main:
-; CHECK: tbh
+; CHECK: tbb
 entry:
 	br label %bb42.i
 
@@ -26,7 +24,7 @@ bb1.i2:		; preds = %bb42.i
 	br label %bb40.i
 
 bb5.i:		; preds = %bb42.i
-	%0 = or i32 %_Y_flags.1, 32		; <i32> [#uses=1]
+	%0 = or i32 %argc, 32		; <i32> [#uses=1]
 	br label %bb40.i
 
 bb7.i:		; preds = %bb42.i
@@ -66,14 +64,10 @@ bb39.i:		; preds = %bb42.i
 	unreachable
 
 bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
-	%_Y_flags.0 = phi i32 [ 0, %bb1.i2 ], [ %0, %bb5.i ], [ %_Y_flags.1, %bb42.i ]		; <i32> [#uses=1]
-	%_Y_eflag.b.0 = phi i1 [ %_Y_eflag.b.1, %bb1.i2 ], [ %_Y_eflag.b.1, %bb5.i ], [ true, %bb42.i ]		; <i1> [#uses=1]
 	br label %bb42.i
 
 bb42.i:		; preds = %bb40.i, %entry
-	%_Y_eflag.b.1 = phi i1 [ false, %entry ], [ %_Y_eflag.b.0, %bb40.i ]		; <i1> [#uses=2]
-	%_Y_flags.1 = phi i32 [ 0, %entry ], [ %_Y_flags.0, %bb40.i ]		; <i32> [#uses=2]
-	switch i32 undef, label %bb39.i [
+	switch i32 %argc, label %bb39.i [
 		i32 67, label %bb33.i
 		i32 70, label %bb35.i
 		i32 77, label %bb37.i
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index c6867d9..0f122f2 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,34 +1,40 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1
+; CHECK: teq.w  r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp ne i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2
+; CHECK: teq.w r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3
+; CHECK: teq.w  r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp ne i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4
+; CHECK: teq.w  r0, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6
+; CHECK: teq.w  r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -36,6 +42,8 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7
+; CHECK: teq.w  r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -43,6 +51,8 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8
+; CHECK: teq.w  r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -50,6 +60,8 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9
+; CHECK: teq.w  r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 37919dd..75e1d70 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,13 +1,15 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtb | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtab | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxth | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @test1(i32 %A.u) zeroext {
+; CHECK: test1
+; CHECK: uxtb r0, r0
     %B.u = trunc i32 %A.u to i8
     ret i8 %B.u
 }
 
 define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+; CHECK: test2
+; CHECK: uxtab  r0, r0, r1
     %C.u = trunc i32 %B.u to i8
     %D.u = zext i8 %C.u to i32
     %E.u = add i32 %A.u, %D.u
@@ -15,6 +17,8 @@ define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
 }
 
 define i32 @test3(i32 %A.u) zeroext {
+; CHECK: test3
+; CHECK: uxth.w r0, r0, ror #8
     %B.u = lshr i32 %A.u, 8
     %C.u = shl i32 %A.u, 24
     %D.u = or i32 %B.u, %C.u
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 4022d95..4e23f53 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,36 +1,47 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep uxt | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test1(i32 %x) {
+; CHECK: test1
+; CHECK: uxtb16.w  r0, r0
 	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32 @test2(i32 %x) {
+; CHECK: test2
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test3(i32 %x) {
+; CHECK: test3
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test4(i32 %x) {
+; CHECK: test4
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp6
 }
 
 define i32 @test5(i32 %x) {
+; CHECK: test5
+; CHECK: uxtb16.w  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test6(i32 %x) {
+; CHECK: test6
+; CHECK: uxtb16.w  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -40,6 +51,8 @@ define i32 @test6(i32 %x) {
 }
 
 define i32 @test7(i32 %x) {
+; CHECK: test7
+; CHECK: uxtb16.w  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -49,6 +62,8 @@ define i32 @test7(i32 %x) {
 }
 
 define i32 @test8(i32 %x) {
+; CHECK: test8
+; CHECK: uxtb16.w  r0, r0, ror #24
 	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
 	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
@@ -57,6 +72,8 @@ define i32 @test8(i32 %x) {
 }
 
 define i32 @test9(i32 %x) {
+; CHECK: test9
+; CHECK: uxtb16.w  r0, r0, ror #24
 	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
@@ -65,6 +82,13 @@ define i32 @test9(i32 %x) {
 }
 
 define i32 @test10(i32 %p0) {
+; CHECK: test10
+; CHECK: mov.w r1, #16253176
+; CHECK: and.w r0, r1, r0, lsr #7
+; CHECK: lsrs  r1, r0, #5
+; CHECK: uxtb16.w  r1, r1
+; CHECK: orr.w r0, r1, r0
+
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
 	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
index c106f57..3f67097 100644
--- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -11,7 +11,7 @@ target triple = "i686-apple-darwin8.6.1"
 
 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
 
-declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
 
 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
 
@@ -33,8 +33,8 @@ cond_false183:		; preds = %cond_false, %entry
 	%tmp337 = bitcast <4 x i32> %tmp336 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp379 = bitcast <4 x float> %tmp378 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp388 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
-	%tmp392 = bitcast <4 x i32> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
+	%tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp399 = extractelement <8 x i16> %tmp392, i32 7		; <i16> [#uses=1]
 	%tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7		; <<8 x i16>> [#uses=1]
 	%tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8>		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index 49f3a95..b045329 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -17,8 +17,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp75 = bitcast <4 x float> %tmp74 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp89 = bitcast <4 x float> %tmp88 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp98 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
-	%tmp102 = bitcast <4 x i32> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
+	%tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -32,8 +32,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp134 = bitcast <4 x float> %tmp133 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp148 = bitcast <4 x float> %tmp147 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp159 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
-	%tmp163 = bitcast <4 x i32> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
+	%tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -47,8 +47,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp195 = bitcast <4 x float> %tmp194 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp209 = bitcast <4 x float> %tmp208 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp220 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
-	%tmp224 = bitcast <4 x i32> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
+	%tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -62,8 +62,8 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 	%tmp256 = bitcast <4 x float> %tmp255 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 )		; <<4 x float>> [#uses=1]
 	%tmp270 = bitcast <4 x float> %tmp269 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp281 = tail call <4 x i32> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
-	%tmp285 = bitcast <4 x i32> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
+	%tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 	%tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float>		; <<4 x float>> [#uses=1]
@@ -73,4 +73,4 @@ define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>*
 
 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
 
-declare <4 x i32> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index 989dfc5..b27ef83 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
 
-declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)
 
@@ -13,8 +13,8 @@ define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32
 	%tmp805 = add <4 x i32> %tmp777, zeroinitializer
 	%tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16>
 	%tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > )
-	%tmp1020 = tail call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
-	%tmp1030 = bitcast <8 x i16> %tmp1020 to <4 x i32>
+	%tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
+	%tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32>
 	%tmp1033 = add <4 x i32> zeroinitializer, %tmp1030
 	%tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64>
 	%tmp1049 = or <2 x i64> %tmp1048, zeroinitializer
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 83eb61a..2aea9c5 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; PR5534
 
 	%struct.CGPoint = type { double, double }
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
new file mode 100644
index 0000000..8c62f4d
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Spill"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Folded Spill"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Reload"
+
+	%struct..0anon = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@rtx_format = external global [116 x i8*]		; <[116 x i8*]*> [#uses=1]
+@rtx_length = external global [117 x i32]		; <[117 x i32]*> [#uses=1]
+
+declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
+
+define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
+entry:
+	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
+	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:		; preds = %entry
+	%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
+	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
+	br i1 %tmp10, label %cond_true13, label %cond_next32
+
+cond_true13:		; preds = %cond_next
+	%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
+	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true25, label %cond_next32
+
+cond_true25:		; preds = %cond_true13
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 )		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %tmp29
+
+cond_next32:		; preds = %cond_true13, %cond_next
+	%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
+	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
+	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
+	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
+	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %bb123, %cond_next32
+	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
+	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
+	%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
+	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	switch i8 %tmp43, label %bb123 [
+		 i8 101, label %cond_true47
+		 i8 69, label %bb105.preheader
+	]
+
+cond_true47:		; preds = %bb
+	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
+	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
+	store i32 %tmp58.c, i32* %tmp62
+	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
+	br i1 %tmp6917, label %bb105.preheader, label %bb123
+
+bb105.preheader:		; preds = %cond_true47, %bb
+	%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
+	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
+	br i1 %tmp11625, label %bb123, label %bb73
+
+bb73:		; preds = %bb73, %bb105.preheader
+	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
+	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
+	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
+	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
+	store i32 %tmp98.c, i32* %tmp101
+	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
+	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
+	br i1 %tmp116, label %bb73, label %bb123
+
+bb123:		; preds = %bb73, %bb105.preheader, %cond_true47, %bb
+	%i.0 = add i32 %i.01.0, -1		; <i32> [#uses=1]
+	%tmp125 = icmp sgt i32 %i.0, -1		; <i1> [#uses=1]
+	%indvar.next26 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp125, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb123, %cond_next32, %entry
+	%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ]		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %UnifiedRetVal
+}
diff --git a/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
new file mode 100644
index 0000000..5398eef
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; rdar://7394770
+
+%struct.JVTLib_100487 = type <{ i8 }>
+
+define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp {
+bb.nph:
+  %0 = shl i32 undef, 1                           ; <i32> [#uses=2]
+  %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb2:                                              ; preds = %bb
+  unreachable
+
+bb.i:                                             ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+bb1.i:                                            ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i
+  br i1 undef, label %bb5, label %bb
+
+bb5:                                              ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+  %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2]
+  %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1]
+  %mask241.masked = or i256 undef, undef          ; <i256> [#uses=1]
+  %ins237 = or i256 undef, 0                      ; <i256> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+bb12.i:                                           ; preds = %bb9
+  br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb12.i, %bb9
+  ret i32 undef
+
+bb10:                                             ; preds = %bb5
+  %1 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %2 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %3 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %4 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %5 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %6 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0]
+  %7 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1]
+  %tmp209 = trunc i256 %tmp208 to i16             ; <i16> [#uses=1]
+  %8 = sext i16 %tmp209 to i32                    ; <i32> [#uses=1]
+  %9 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %10 = sext i16 undef to i32                     ; <i32> [#uses=1]
+  %tmp193 = lshr i256 %mask241.masked, 208        ; <i256> [#uses=1]
+  %tmp194 = trunc i256 %tmp193 to i16             ; <i16> [#uses=1]
+  %11 = sext i16 %tmp194 to i32                   ; <i32> [#uses=1]
+  %tmp187 = lshr i256 %ins237, 240                ; <i256> [#uses=1]
+  %tmp188 = trunc i256 %tmp187 to i16             ; <i16> [#uses=1]
+  %12 = sext i16 %tmp188 to i32                   ; <i32> [#uses=1]
+  %13 = add nsw i32 %4, %1                        ; <i32> [#uses=1]
+  %14 = add nsw i32 %5, 0                         ; <i32> [#uses=1]
+  %15 = add nsw i32 %6, %2                        ; <i32> [#uses=1]
+  %16 = add nsw i32 %7, %3                        ; <i32> [#uses=1]
+  %17 = add nsw i32 0, %8                         ; <i32> [#uses=1]
+  %18 = add nsw i32 %11, %9                       ; <i32> [#uses=1]
+  %19 = add nsw i32 0, %10                        ; <i32> [#uses=1]
+  %20 = add nsw i32 %12, 0                        ; <i32> [#uses=1]
+  %21 = add nsw i32 %17, %13                      ; <i32> [#uses=2]
+  %22 = add nsw i32 %18, %14                      ; <i32> [#uses=2]
+  %23 = add nsw i32 %19, %15                      ; <i32> [#uses=2]
+  %24 = add nsw i32 %20, %16                      ; <i32> [#uses=2]
+  %25 = add nsw i32 %22, %21                      ; <i32> [#uses=2]
+  %26 = add nsw i32 %24, %23                      ; <i32> [#uses=2]
+  %27 = sub i32 %21, %22                          ; <i32> [#uses=1]
+  %28 = sub i32 %23, %24                          ; <i32> [#uses=1]
+  %29 = add nsw i32 %26, %25                      ; <i32> [#uses=1]
+  %30 = sub i32 %25, %26                          ; <i32> [#uses=1]
+  %31 = sub i32 %27, %28                          ; <i32> [#uses=1]
+  %32 = ashr i32 %29, 1                           ; <i32> [#uses=2]
+  %33 = ashr i32 %30, 1                           ; <i32> [#uses=2]
+  %34 = ashr i32 %31, 1                           ; <i32> [#uses=2]
+  %35 = icmp sgt i32 %32, 32767                   ; <i1> [#uses=1]
+  %o0_0.0.i = select i1 %35, i32 32767, i32 %32   ; <i32> [#uses=2]
+  %36 = icmp slt i32 %o0_0.0.i, -32768            ; <i1> [#uses=1]
+  %37 = icmp sgt i32 %33, 32767                   ; <i1> [#uses=1]
+  %o1_0.0.i = select i1 %37, i32 32767, i32 %33   ; <i32> [#uses=2]
+  %38 = icmp slt i32 %o1_0.0.i, -32768            ; <i1> [#uses=1]
+  %39 = icmp sgt i32 %34, 32767                   ; <i1> [#uses=1]
+  %o2_0.0.i = select i1 %39, i32 32767, i32 %34   ; <i32> [#uses=2]
+  %40 = icmp slt i32 %o2_0.0.i, -32768            ; <i1> [#uses=1]
+  %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1]
+  %41 = trunc i32 %o0_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp358 = select i1 %36, i16 -32768, i16 %41    ; <i16> [#uses=2]
+  %42 = trunc i32 %o1_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp347 = select i1 %38, i16 -32768, i16 %42    ; <i16> [#uses=1]
+  %43 = trunc i32 %o2_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp335 = select i1 %40, i16 -32768, i16 %43    ; <i16> [#uses=1]
+  %44 = icmp sgt i16 %tmp358, -1                  ; <i1> [#uses=2]
+  %..i24 = select i1 %44, i16 %tmp358, i16 undef  ; <i16> [#uses=1]
+  %45 = icmp sgt i16 %tmp347, -1                  ; <i1> [#uses=1]
+  %46 = icmp sgt i16 %tmp335, -1                  ; <i1> [#uses=1]
+  %47 = zext i16 %..i24 to i32                    ; <i32> [#uses=1]
+  %tmp = trunc i640 %tmp101 to i32                ; <i32> [#uses=1]
+  %48 = and i32 %tmp, 65535                       ; <i32> [#uses=2]
+  %49 = mul i32 %47, %48                          ; <i32> [#uses=1]
+  %50 = zext i16 undef to i32                     ; <i32> [#uses=1]
+  %51 = mul i32 %50, %48                          ; <i32> [#uses=1]
+  %52 = add i32 %49, %0                           ; <i32> [#uses=1]
+  %53 = add i32 %51, %0                           ; <i32> [#uses=1]
+  %54 = lshr i32 %52, undef                       ; <i32> [#uses=1]
+  %55 = lshr i32 %53, undef                       ; <i32> [#uses=1]
+  %56 = trunc i32 %54 to i16                      ; <i16> [#uses=1]
+  %57 = trunc i32 %55 to i16                      ; <i16> [#uses=1]
+  %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1]
+  %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1]
+  %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1]
+  br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+bb129.i:                                          ; preds = %bb10
+  br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb129.i, %bb10
+  %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0]
+  %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0]
+  store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
new file mode 100644
index 0000000..a7c2020
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7395200
+
+@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4]
+
+define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
+entry:
+; CHECK: foo:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movq _g@GOTPCREL(%rip), %rcx
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: LBB1_2:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
+  %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
+  %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
+  %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+  %tmp1117 = or i64 %tmp9, 2                      ; <i64> [#uses=1]
+  %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+  %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
+  %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+  %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
+  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  store float %1, float* %x_addr.03, align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  store float %2, float* %scevgep, align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  store float %3, float* %scevgep12, align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  store float %4, float* %scevgep14, align 4
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
new file mode 100644
index 0000000..3ce9edb
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7396984
+
+@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
+
+define void @t(i32 %count) ssp nounwind {
+entry:
+; CHECK: t:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movups L_str(%rip), %xmm0
+  %tmp0 = alloca [60 x i8], align 1
+  %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+  br label %bb1
+
+bb1:
+; CHECK: LBB1_1:
+; CHECK: movaps %xmm0, (%rsp)
+  %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
+  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  %tmp3 = add i32 %tmp2, 1
+  %tmp4 = icmp eq i32 %tmp3, %count
+  br i1 %tmp4, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
new file mode 100644
index 0000000..5c1a2bc
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
@@ -0,0 +1,52 @@
+; RUN: llc -O3 < %s
+; This test fails with:
+; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" }
+%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb56, label %bb27.outer
+
+bb8:                                              ; preds = %bb27.outer108, %bb13
+  switch i8 undef, label %bb27.outer [
+    i8 35, label %bb56
+    i8 37, label %bb14
+    i8 38, label %bb10
+    i8 42, label %bb56
+  ]
+
+bb27.outer:                                       ; preds = %bb8, %entry
+  %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2]
+  br label %bb27.outer108
+
+bb10:                                             ; preds = %bb8
+  %toBool = icmp eq i8 0, 0                       ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %toBool                ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb13, label %bb56
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb27.outer108, label %bb8
+
+bb14:                                             ; preds = %bb8
+  ret i8 1
+
+bb27.outer108:                                    ; preds = %bb13, %bb27.outer
+  %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+  %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+  br label %bb8
+
+bb56:                                             ; preds = %bb10, %bb8, %bb8, %entry
+  ret i8 1
+}
diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll
new file mode 100644
index 0000000..633995d
--- /dev/null
+++ b/test/CodeGen/X86/bigstructret.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.24601, 12(%ecx)" %t
+; RUN: grep "movl	.48, 8(%ecx)" %t
+; RUN: grep "movl	.24, 4(%ecx)" %t
+; RUN: grep "movl	.12, (%ecx)" %t
+
+%0 = type { i32, i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
index de89374..4878448 100644
--- a/test/CodeGen/X86/cmp0.ll
+++ b/test/CodeGen/X86/cmp0.ll
@@ -1,7 +1,24 @@
-; RUN: llc < %s -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-define i64 @foo(i64 %x) {
+define i64 @test0(i64 %x) nounwind {
   %t = icmp eq i64 %x, 0
   %r = zext i1 %t to i64
   ret i64 %r
+; CHECK: test0:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
 }
+
+define i64 @test1(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test1:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
diff --git a/test/CodeGen/X86/hidden-vis-5.ll b/test/CodeGen/X86/hidden-vis-5.ll
new file mode 100644
index 0000000..88fae37
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-5.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; <rdar://problem/7383328>
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define hidden void @func() nounwind ssp {
+entry:
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define hidden i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @func() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+; CHECK: .private_extern _func.eh
+; CHECK: .private_extern _main.eh
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index c0379d1..ec5236b 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -74,16 +74,16 @@ exit:
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB3_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT: .LBB3_4:
 ; CHECK-NEXT:   call bar99
 ; CHECK-NEXT:   call get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB3_5
+; CHECK-NEXT:   jg .LBB3_6
 ; CHECK-NEXT:   call block_a_true_func
-; CHECK-NEXT:   jmp .LBB3_6
-; CHECK-NEXT: .LBB3_5:
-; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT:   jmp .LBB3_7
 ; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   call block_a_merge_func
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   call body
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index a1f38a7..9b53adb 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -4,7 +4,7 @@
 
 @flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]
 
-define void @test(i32 %k, i32 %i) {
+define void @test(i32 %k, i32 %i) nounwind {
 entry:
 	%k_addr.012 = shl i32 %i, 1		; <i32> [#uses=1]
 	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
index e340edd..c45a374 100644
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | grep cmp | grep 240
 ; RUN: llc < %s -march=x86 | grep inc | count 1
 
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
 entry:
 	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
 	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index 4ec2a02..b07eeb6 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -3,7 +3,7 @@
 @X = weak global i16 0		; <i16*> [#uses=1]
 @Y = weak global i16 0		; <i16*> [#uses=1]
 
-define void @foo(i32 %N) {
+define void @foo(i32 %N) nounwind {
 entry:
 	%tmp1019 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %tmp1019, label %bb, label %return
diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
index 81da82e..bbafcf7c 100644
--- a/test/CodeGen/X86/loop-strength-reduce6.ll
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 | not grep inc
 
-define fastcc i32 @decodeMP3(i32 %isize, i32* %done) {
+define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
 entry:
 	br i1 false, label %cond_next191, label %cond_true189
 
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
new file mode 100644
index 0000000..3f90245
--- /dev/null
+++ b/test/CodeGen/X86/object-size.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; ModuleID = 'ts.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+@p = common global i8* null, align 8              ; <i8**> [#uses=4]
+@.str = private constant [3 x i8] c"Hi\00"        ; <[3 x i8]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i32 0) ; <i64> [#uses=1]
+  %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
+; X64: movq    $-1, %rax
+; X64: cmpq    $-1, %rax
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0]
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i32) nounwind readonly
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
+
+define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp {
+entry:
+  %retval = alloca i8*                            ; <i8**> [#uses=2]
+  %__dest.addr = alloca i8*                       ; <i8**> [#uses=3]
+  %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
+  store i8* %__dest, i8** %__dest.addr
+  store i8* %__src, i8** %__src.addr
+  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i32 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
+  store i8* %call, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 6319cb8..21c1a3c 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -145,7 +145,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 	ret void
 ; X64: 	t9:
 ; X64: 		movsd	(%rsi), %xmm0
-; X64: 		movhps	%xmm0, (%rdi)
+; X64:	        movaps  (%rdi), %xmm1
+; X64:	        movlhps %xmm0, %xmm1
+; X64:	        movaps  %xmm1, (%rdi)
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 0f32a50..d762392 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
 ; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 6
-; RUN:   grep asm-printer %t   | grep 177
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
new file mode 100644
index 0000000..0d86e56
--- /dev/null
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -0,0 +1,408 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+@HABC = global i32 0
+
+; BranchFolding should tail-merge the stores since they all precede
+; direct branches to the same place.
+
+; CHECK: tail_merge_me:
+; CHECK-NOT:  GHJK
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: movl $1, HABC(%rip)
+; CHECK-NOT:  GHJK
+
+define void @tail_merge_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  store i32 1, i32* @HABC
+  %c = call i1 @qux()
+  br i1 %c, label %return, label %altret
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+declare i8* @choose(i8*, i8*);
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+; BranchFolding shouldn't try to merge the tails of two blocks
+; with only a branch in common, regardless of the fallthrough situation.
+
+; CHECK: dont_merge_oddly:
+; CHECK-NOT:   ret
+; CHECK:        ucomiss %xmm0, %xmm1
+; CHECK-NEXT:   jbe .LBB3_3
+; CHECK-NEXT:   ucomiss %xmm2, %xmm0
+; CHECK-NEXT:   ja .LBB3_4
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT:   ucomiss %xmm2, %xmm1
+; CHECK-NEXT:   jbe .LBB3_2
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   ret
+
+define i1 @dont_merge_oddly(float* %result) nounwind {
+entry:
+  %tmp4 = getelementptr float* %result, i32 2
+  %tmp5 = load float* %tmp4, align 4
+  %tmp7 = getelementptr float* %result, i32 4
+  %tmp8 = load float* %tmp7, align 4
+  %tmp10 = getelementptr float* %result, i32 6
+  %tmp11 = load float* %tmp10, align 4
+  %tmp12 = fcmp olt float %tmp8, %tmp11
+  br i1 %tmp12, label %bb, label %bb21
+
+bb:
+  %tmp23469 = fcmp olt float %tmp5, %tmp8
+  br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:
+  %tmp23 = fcmp olt float %tmp5, %tmp11
+  br i1 %tmp23, label %bb26, label %bb30
+
+bb26:
+  ret i1 0
+
+bb30:
+  ret i1 1
+}
+
+; Do any-size tail-merging when two candidate blocks will both require
+; an unconditional jump to complete a two-way conditional branch.
+
+; CHECK: c_expand_expr_stmt:
+; CHECK:        jmp .LBB4_7
+; CHECK-NEXT: .LBB4_12:
+; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   movb 16(%rax), %al
+; CHECK-NEXT:   cmpb $16, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   cmpb $23, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   jmp .LBB4_15
+; CHECK-NEXT: .LBB4_14:
+; CHECK-NEXT:   cmpb $23, %bl
+; CHECK-NEXT:   jne .LBB4_15
+; CHECK-NEXT: .LBB4_15:
+
+%0 = type { %struct.rtx_def* }
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
+%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
+%union..2anon = type { i32 }
+%union.rtunion = type { i8* }
+%union.tree_node = type { %struct.tree_decl }
+
+define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+entry:
+  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  switch i8 %tmp4, label %bb3 [
+    i8 18, label %bb
+  ]
+
+bb:                                               ; preds = %entry
+  switch i32 undef, label %bb1 [
+    i32 0, label %bb2.i
+    i32 37, label %bb.i
+  ]
+
+bb.i:                                             ; preds = %bb
+  switch i32 undef, label %bb1 [
+    i32 0, label %lvalue_p.exit
+  ]
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3
+
+lvalue_p.exit:                                    ; preds = %bb.i
+  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp24, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i3
+    i32 2, label %bb.i1
+  ]
+
+bb.i1:                                            ; preds = %lvalue_p.exit
+  %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+  %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp30, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i.i2
+    i32 2, label %bb.i.i
+  ]
+
+bb.i.i:                                           ; preds = %bb.i1
+  %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
+  %phitmp = icmp ne i32 %tmp34, 0                 ; <i1> [#uses=1]
+  br label %lvalue_p.exit4
+
+bb2.i.i2:                                         ; preds = %bb.i1
+  %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp39, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+bb2.i3:                                           ; preds = %lvalue_p.exit
+  %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp44, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+lvalue_p.exit4:                                   ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
+  %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
+  %tmp46 = icmp eq i8 %tmp4, 0                    ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp45, %tmp46                 ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb2, label %bb3
+
+bb1:                                              ; preds = %bb2.i.i, %bb.i, %bb
+  %.old = icmp eq i8 %tmp4, 23                    ; <i1> [#uses=1]
+  br i1 %.old, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
+  %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
+  unreachable
+}
+
+declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
+
+declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
+
+
+; If one tail merging candidate falls through into the other,
+; tail merging is likely profitable regardless of how few
+; instructions are involved. This function should have only
+; one ret instruction.
+
+; CHECK: foo:
+; CHECK:        call func
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT:   addq $8, %rsp
+; CHECK-NEXT:   ret
+
+define void @foo(i1* %V) nounwind {
+entry:
+  %t0 = icmp eq i1* %V, null
+  br i1 %t0, label %return, label %bb
+
+bb:
+  call void @func()
+  ret void
+
+return:
+  ret void
+}
+
+declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
index a4f87c0..4923df2 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 4
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
@@ -9,3 +9,24 @@ entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
 }
+
+declare fastcc i8* @alias_callee()
+
+define fastcc noalias i8* @noalias_caller() nounwind {
+  %p = tail call fastcc i8* @alias_callee()
+  ret i8* %p
+}
+
+declare fastcc noalias i8* @noalias_callee()
+
+define fastcc i8* @alias_caller() nounwind {
+  %p = tail call fastcc noalias i8* @noalias_callee()
+  ret i8* %p
+}
+
+declare fastcc i32 @i32_callee()
+
+define fastcc i32 @ret_undef() nounwind {
+  %p = tail call fastcc i32 @i32_callee()
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 556f103..f4930b0 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -18,4 +18,3 @@ entry:
         %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp4
 }
-
diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
index e42b538..cdb030e 100644
--- a/test/CodeGen/X86/vec_zero-2.ll
+++ b/test/CodeGen/X86/vec_zero-2.ll
@@ -12,8 +12,8 @@ bb4743:		; preds = %bb1664
 	%tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer		; <<8 x i16>> [#uses=1]
 	%tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%tmp5266 = call <8 x i16> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
-	%tmp5267 = bitcast <8 x i16> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp5294 = and <2 x i64> zeroinitializer, %tmp5267		; <<2 x i64>> [#uses=1]
 	br label %bb5310
 bb5310:		; preds = %bb4743, %bb1664
@@ -21,4 +21,4 @@ bb5310:		; preds = %bb4743, %bb1664
 	ret i32 0
 }
 
-declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/test/CodeGen/XCore/bigstructret.ll b/test/CodeGen/XCore/bigstructret.ll
new file mode 100644
index 0000000..56af930
--- /dev/null
+++ b/test/CodeGen/XCore/bigstructret.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%0 = type { i32, i32, i32, i32 }
+%1 = type { i32, i32, i32, i32, i32 }
+
+; Structs of 4 words can be returned in registers
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+; CHECK: ReturnBigStruct:
+; CHECK: ldc r0, 12
+; CHECK: ldc r1, 24
+; CHECK: ldc r2, 48
+; CHECK: ldc r3, 24601
+; CHECK: retsp 0
+
+; Structs bigger than 4 words are returned via a hidden hidden sret-parameter
+define internal fastcc %1 @ReturnBigStruct2() nounwind readnone {
+entry:
+  %0 = insertvalue %1 zeroinitializer, i32 12, 0
+  %1 = insertvalue %1 %0, i32 24, 1
+  %2 = insertvalue %1 %1, i32 48, 2
+  %3 = insertvalue %1 %2, i32 24601, 3
+  %4 = insertvalue %1 %3, i32 4321, 4
+  ret %1 %4
+}
+; CHECK: ReturnBigStruct2:
+; CHECK: ldc r1, 4321
+; CHECK: stw r1, r0[4]
+; CHECK: ldc r1, 24601
+; CHECK: stw r1, r0[3]
+; CHECK: ldc r1, 48
+; CHECK: stw r1, r0[2]
+; CHECK: ldc r1, 24
+; CHECK: stw r1, r0[1]
+; CHECK: ldc r1, 12
+; CHECK: stw r1, r0[0]
+; CHECK: retsp 0
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
new file mode 100644
index 0000000..c7838c5
--- /dev/null
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -0,0 +1,17 @@
+; RUN: llc %s -o /dev/null
+; Here variable bar is optimzied away. Do not trip over while trying to generate debug info.
+
+define i32 @foo() nounwind readnone optsize ssp {
+entry:
+  ret i32 42, !dbg !6
+}
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"foo.bar", metadata !"foo.bar", metadata !"foo.bar", metadata !2, i32 3, metadata !5, i1 true, i1 true, null}; [DW_TAG_variable ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true}; [DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 12, metadata !"st.c", metadata !"/private/tmp", metadata !"clang 1.1", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!6 = metadata !{i32 5, i32 1, metadata !1, null}
diff --git a/test/DebugInfo/2009-11-06-InvalidDerivedType.ll b/test/DebugInfo/2009-11-06-InvalidDerivedType.ll
new file mode 100644
index 0000000..73211bb
--- /dev/null
+++ b/test/DebugInfo/2009-11-06-InvalidDerivedType.ll
@@ -0,0 +1,13 @@
+; RUN: llc %s -o /dev/null
+%struct._t = type { i32 }
+
+@s1 = common global %struct._t zeroinitializer, align 4 ; <%struct._t*> [#uses=0]
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"s1", metadata !"s1", metadata !"s1", metadata !1, i32 3, metadata !2, i1 false, i1 true, %struct._t* @s1}; [DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"t.c", metadata !"/tmp", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458771, metadata !1, metadata !"_t", metadata !1, i32 1, i64 32, i64 32, i64 0, i32 0, null, metadata !3, i32 0}; [DW_TAG_structure_type ]
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 458765, metadata !1, metadata !"j", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5}; [DW_TAG_member ]
+!5 = metadata !{i32 458790, metadata !1, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null}; [DW_TAG_const_type ]
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
new file mode 100644
index 0000000..739def8
--- /dev/null
+++ b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
@@ -0,0 +1,8 @@
+; RUN: llc %s -o /dev/null
+@0 = internal constant i32 1                      ; <i32*> [#uses=1]
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"", metadata !"", metadata !"", metadata !1, i32 378, metadata !2, i1 true, i1 true, i32* @0}; [DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 1, metadata !"cbdsqr.f", metadata !"/home/duncan/LLVM/dragonegg/unsolved/", metadata !"4.5.0 20091030 (experimental)", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458788, metadata !1, metadata !"integer(kind=4)", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
new file mode 100644
index 0000000..250395c
--- /dev/null
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -o /dev/null
+
+declare void @foo()
+
+define void @bar(i32 %i) nounwind ssp {
+entry:
+  tail call void @foo() nounwind, !dbg !0
+  ret void, !dbg !6
+}
+
+!0 = metadata !{i32 9, i32 0, metadata !1, null}
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !3, i1 true, i1 true}; [DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Volumes/Data/ddunbar/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!6 = metadata !{i32 18, i32 0, metadata !7, null}
+!7 = metadata !{i32 458798, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 16, metadata !8, i1 false, i1 true}; [DW_TAG_subprogram ]
+!8 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0}; [DW_TAG_subroutine_type ]
+!9 = metadata !{null}
diff --git a/test/DebugInfo/2009-11-10-ParentScope.ll b/test/DebugInfo/2009-11-10-ParentScope.ll
new file mode 100644
index 0000000..df5155f
--- /dev/null
+++ b/test/DebugInfo/2009-11-10-ParentScope.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -o /dev/null
+%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32, [4 x i8] }
+
+define i8* @htab_find_with_hash(%struct.htab* %htab, i8* %element, i32 %hash) nounwind {
+entry:
+  br i1 undef, label %land.lhs.true, label %if.end, !dbg !0
+
+land.lhs.true:                                    ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  store i8* undef, i8** undef, !dbg !7
+  ret i8* undef, !dbg !10
+}
+
+!0 = metadata !{i32 571, i32 3, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"htab_find_with_hash", metadata !"htab_find_with_hash", metadata !"htab_find_with_hash", metadata !3, i32 561, metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 458767, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null}; [DW_TAG_pointer_type ]
+!7 = metadata !{i32 583, i32 7, metadata !8, null}
+!8 = metadata !{i32 458763, metadata !9}; [DW_TAG_lexical_block ]
+!9 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+!10 = metadata !{i32 588, i32 1, metadata !2, null}
diff --git a/test/FrontendC++/2006-11-06-StackTrace.cpp b/test/FrontendC++/2006-11-06-StackTrace.cpp
index bbb9af1..24eeda8 100644
--- a/test/FrontendC++/2006-11-06-StackTrace.cpp
+++ b/test/FrontendC++/2006-11-06-StackTrace.cpp
@@ -12,7 +12,7 @@
 
 // Only works on ppc (but not apple-darwin9), x86 and x86_64.  Should
 // generalize?
-// XFAIL: alpha|arm|powerpc-apple-darwin9
+// XFAIL: alpha,arm,powerpc-apple-darwin9
 
 #include <stdlib.h>
 
diff --git a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
index 3522c67..cd8b988 100644
--- a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
+++ b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
@@ -7,7 +7,7 @@
 // RUN: echo {break main\nrun\np NoCompileUnit::pubname} > %t2
 // RUN: gdb -q -batch -n -x %t2 NoCompileUnit.exe | \
 // RUN:   tee NoCompileUnit.out | not grep {"low == high"}
-// XFAIL: alpha|arm
+// XFAIL: alpha,arm
 // XFAIL: *
 // See PR2454
 
diff --git a/test/FrontendC++/2006-11-30-Pubnames.cpp b/test/FrontendC++/2006-11-30-Pubnames.cpp
index b44566a..48ad827 100644
--- a/test/FrontendC++/2006-11-30-Pubnames.cpp
+++ b/test/FrontendC++/2006-11-30-Pubnames.cpp
@@ -7,7 +7,7 @@
 // RUN: %llvmdsymutil %t.exe 
 // RUN: echo {break main\nrun\np Pubnames::pubname} > %t.in
 // RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | grep {\$1 = 10}
-// XFAIL: alpha|arm
+// XFAIL: alpha,arm
 struct Pubnames {
   static int pubname;
 };
diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp
index 54624a3..21e096c 100644
--- a/test/FrontendC++/2009-07-15-LineNumbers.cpp
+++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp
@@ -2,7 +2,6 @@
 // print line numbers in asm.
 // RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
 // RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {# SrcLine 25}
-// XFAIL: *
 
 #include <stdlib.h>
 
diff --git a/test/FrontendC/Atomics-no64bit.c b/test/FrontendC/Atomics-no64bit.c
index 12fb75a..6fb6109 100644
--- a/test/FrontendC/Atomics-no64bit.c
+++ b/test/FrontendC/Atomics-no64bit.c
@@ -9,7 +9,7 @@
 // Currently this is implemented only for Alpha, X86, PowerPC.
 // Add your target here if it doesn't work.
 // This version of the test does not include long long.
-// XFAIL: sparc|arm
+// XFAIL: sparc,arm
 
 signed char sc;
 unsigned char uc;
diff --git a/test/FrontendC/Atomics.c b/test/FrontendC/Atomics.c
index 2aed55c..2b96ae0 100644
--- a/test/FrontendC/Atomics.c
+++ b/test/FrontendC/Atomics.c
@@ -9,7 +9,7 @@
 // Currently this is implemented only for Alpha, X86, PowerPC.
 // Add your target here if it doesn't work.
 // PPC32 does not translate the long long variants, so fails this test.
-// XFAIL: sparc|arm|powerpc
+// XFAIL: sparc,arm,powerpc
 
 signed char sc;
 unsigned char uc;
diff --git a/test/LLVMC/test_data/false.cpp b/test/LLVMC/test_data/false.cpp
index d3a7102..593fcd5 100644
--- a/test/LLVMC/test_data/false.cpp
+++ b/test/LLVMC/test_data/false.cpp
@@ -1,4 +1,3 @@
-/* RUN: ignore */
 #include <stdio.h>
 
 /* Make this invalid C++ */
diff --git a/test/LLVMC/test_data/false2.cpp b/test/LLVMC/test_data/false2.cpp
index a020514..bba064c 100644
--- a/test/LLVMC/test_data/false2.cpp
+++ b/test/LLVMC/test_data/false2.cpp
@@ -1,4 +1,3 @@
-// RUN: ignore
 #include <string>
 
 std::string test2() {
diff --git a/test/LLVMC/test_data/together.c b/test/LLVMC/test_data/together.c
index 1b9b5f6..a828c47 100644
--- a/test/LLVMC/test_data/together.c
+++ b/test/LLVMC/test_data/together.c
@@ -1,7 +1,3 @@
-/*
- * RUN: ignore
- */
-
 #include <stdio.h>
 
 void test() {
diff --git a/test/Makefile b/test/Makefile
index ede1b44..e7776f8 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -75,6 +75,8 @@ ifdef IGNORE_TESTS
 RUNTESTFLAGS += --ignore "$(strip $(IGNORE_TESTS))"
 endif
 
+# ulimits like these are redundantly enforced by the buildbots, so
+# just removing them here won't work.
 # Both AuroraUX & Solaris do not have the -m flag for ulimit
 ifeq ($(HOST_OS),SunOS)
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
@@ -98,19 +100,11 @@ endif
 
 check-local-lit:: lit.site.cfg Unit/lit.site.cfg
 	( $(ULIMIT) \
-	  $(LLVM_SRC_ROOT)/utils/lit/lit.py \
-		--path "$(LLVMToolDir)" \
-		--path "$(LLVM_SRC_ROOT)/test/Scripts" \
-		--path "$(LLVMGCCDIR)/bin" \
-		$(LIT_ARGS) $(LIT_TESTSUITE) )
+	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
 
 check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs
 	( $(ULIMIT) \
-	  $(LLVM_SRC_ROOT)/utils/lit/lit.py \
-		--path "$(LLVMToolDir)" \
-		--path "$(LLVM_SRC_ROOT)/test/Scripts" \
-		--path "$(LLVMGCCDIR)/bin" \
-		$(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
+	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
 
 ifdef TESTONE
 CLEANED_TESTONE := $(patsubst %/,%,$(TESTONE))
@@ -156,9 +150,8 @@ FORCE:
 
 site.exp: FORCE
 	@echo 'Making a new site.exp file...'
-	@echo '## these variables are automatically generated by make ##' >site.tmp
-	@echo '# Do not edit here.  If you wish to override these values' >>site.tmp
-	@echo '# edit the last section' >>site.tmp
+	@echo '## Autogenerated by LLVM configuration.' > site.tmp
+	@echo '# Do not edit!' >> site.tmp
 	@echo 'set target_triplet "$(TARGET_TRIPLE)"' >> site.tmp
 	@echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp
 	@echo 'set llvmgcc_langs "$(LLVMGCC_LANGS)"' >> site.tmp
@@ -202,15 +195,9 @@ lit.site.cfg: site.exp
 
 Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
 	@echo "Making LLVM unittest 'lit.site.cfg' file..."
-	@echo "## Autogenerated by Makefile ##" > $@
-	@echo "# Do not edit!" >> $@
-	@echo >> $@
-	@echo "# Preserve some key paths for use by main LLVM test suite config." >> $@
-	@echo "config.llvm_obj_root = \"\"\"$(LLVM_OBJ_ROOT)\"\"\"" >> $@
-	@echo >> $@
-	@echo "# Remember the build mode." >> $@
-	@echo "config.llvm_build_mode = \"\"\"$(BuildMode)\"\"\"" >> $@
-	@echo >> $@
-	@echo "# Let the main config do the real work." >> $@
-	@echo "lit.load_config(config, \"\"\"$(LLVM_SRC_ROOT)/test/Unit/lit.cfg\"\"\")" >> $@
-
+	@sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \
+	     -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
+	     -e "s#@LLVM_TOOLS_DIR@#$(ToolDir)#g" \
+	     -e "s#@LLVMGCCDIR@#$(LLVMGCCDIR)#g" \
+	     -e "s#@LLVM_BUILD_MODE@#$(BuildMode)#g" \
+	     $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
diff --git a/test/Transforms/ABCD/basic.ll b/test/Transforms/ABCD/basic.ll
new file mode 100644
index 0000000..f2ce1b9
--- /dev/null
+++ b/test/Transforms/ABCD/basic.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -abcd -S | FileCheck %s
+
+define void @test() {
+; CHECK: @test
+; CHECK-NOT: br i1 %tmp95
+; CHECK: ret void
+entry:
+  br label %bb19
+
+bb:
+  br label %bb1
+
+bb1:
+  %tmp7 = icmp sgt i32 %tmp94, 1
+  br i1 %tmp7, label %bb.i.i, label %return
+
+bb.i.i:
+  br label %return
+
+bb19:
+  %tmp94 = ashr i32 undef, 3
+  %tmp95 = icmp sgt i32 %tmp94, 16
+  br i1 %tmp95, label %bb, label %return
+
+return:
+  ret void
+}
diff --git a/test/Transforms/ABCD/dg.exp b/test/Transforms/ABCD/dg.exp
new file mode 100644
index 0000000..f200589
--- /dev/null
+++ b/test/Transforms/ABCD/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll b/test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll
new file mode 100644
index 0000000..9a943b4
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
+
+declare void @f()
+
+define void @unused_trampoline() {
+; CHECK: @unused_trampoline
+	%storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+; CHECK-NOT: alloca
+	%cast = getelementptr [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @f to i8*), i8* null )		; <i8*> [#uses=1]
+; CHECK-NOT: trampoline
+	ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
new file mode 100644
index 0000000..ce18c6f
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -0,0 +1,39 @@
+; RUN: opt %s -dse -S | FileCheck %s
+
+%t = type { i32 }
+
+@g = global i32 42;
+
+define void @test1(%t* noalias %pp) {
+  %p = getelementptr inbounds %t* %pp, i32 0, i32 0
+
+  store i32 1, i32* %p; <-- This is dead
+  %x = load i32* inttoptr (i32 12345 to i32*)
+  store i32 %x, i32* %p
+  ret void
+; CHECK define void @test1
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+define void @test3() {
+  store i32 1, i32* @g; <-- This is dead.
+  store i32 42, i32* @g
+  ret void
+;CHECK define void @test3
+;CHECK: store
+;CHECK-NOT: store
+;CHECK: ret void
+}
+
+define void @test4(i32* %p) {
+  store i32 1, i32* %p
+  %x = load i32* @g; <-- %p and @g could alias
+  store i32 %x, i32* %p
+  ret void
+; CHECK define void @test4
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/lifetime.ll b/test/Transforms/DeadStoreElimination/lifetime.ll
new file mode 100644
index 0000000..b2da790
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare void @llvm.lifetime.end(i64, i8*)
+declare void @llvm.memset.i8(i8*, i8, i8, i32)
+
+define void @test1() {
+; CHECK: @test1
+  %A = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memset
+  call void @llvm.lifetime.end(i64 1, i8* %A)
+; CHECK: lifetime.end
+
+  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+; CHECK-NOT: memset
+
+  ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/memintrinsics.ll b/test/Transforms/DeadStoreElimination/memintrinsics.ll
new file mode 100644
index 0000000..e31e9fa
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
+declare void @llvm.memmove.i8(i8*, i8*, i8, i32)
+declare void @llvm.memset.i8(i8*, i8, i8, i32)
+
+define void @test1() {
+; CHECK: @test1
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memcpy
+; CHECK-NOT: store
+
+  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+
+  ret void
+; CHECK: ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memmove
+; CHECK-NOT: store
+
+  call void @llvm.memmove.i8(i8* %A, i8* %B, i8 -1, i32 0)
+
+  ret void
+; CHECK: ret void
+}
+
+define void @test3() {
+; CHECK: @test3
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memset
+; CHECK-NOT: store
+
+  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+
+  ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/partial-overwrite.ll b/test/Transforms/DeadStoreElimination/partial-overwrite.ll
new file mode 100644
index 0000000..048d464
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/partial-overwrite.ll
@@ -0,0 +1,14 @@
+; RUN: opt -dse -S %s | FileCheck %s
+; Note that we could do better by merging the two stores into one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i32* %P) {
+  store i32 0, i32* %P
+; CHECK: store i32
+  %Q = bitcast i32* %P to i16*
+  store i16 1, i16* %Q
+; CHECK: store i16
+  ret void
+}
diff --git a/test/Transforms/GVN/null-aliases-nothing.ll b/test/Transforms/GVN/null-aliases-nothing.ll
new file mode 100644
index 0000000..bc5c850
--- /dev/null
+++ b/test/Transforms/GVN/null-aliases-nothing.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -gvn -S | FileCheck %s
+
+%t = type { i32 }
+declare void @test1f(i8*)
+
+define void @test1(%t* noalias %stuff ) {
+    %p = getelementptr inbounds %t* %stuff, i32 0, i32 0
+    %before = load i32* %p
+
+    call void @test1f(i8* null)
+
+    %after = load i32* %p ; <--- This should be a dead load
+    %sum = add i32 %before, %after;
+
+    store i32 %sum, i32* %p
+    ret void
+; CHECK: load
+; CHECK-NOT: load
+; CHECK: ret void
+}
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
new file mode 100644
index 0000000..3505608
--- /dev/null
+++ b/test/Transforms/Inline/delete-call.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -S  -inline -functionattrs -stats |& grep {Number of call sites deleted, not inlined}
+; RUN: opt %s -S  -inline -stats |& grep {Number of functions inlined}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+define internal i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %0 = add nsw i32 %y, %z                         ; <i32> [#uses=1]
+  %1 = mul i32 %0, %x                             ; <i32> [#uses=1]
+  %2 = mul i32 %y, %z                             ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=1]
+  ret i32 %3
+}
+
+define i32 @test2() nounwind {
+entry:
+  %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind ; <i32> [#uses=1]
+  ret i32 14
+}
+
+
diff --git a/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
index a49829a..87c2b75 100644
--- a/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
+++ b/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
 define i16 @test1(i16 %a) {
         %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
         %tmp21 = lshr i32 %tmp, 8               ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/apint-cast.ll b/test/Transforms/InstCombine/apint-cast.ll
index 9bc539e..85e7a4f 100644
--- a/test/Transforms/InstCombine/apint-cast.ll
+++ b/test/Transforms/InstCombine/apint-cast.ll
@@ -1,6 +1,8 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
 define i17 @test1(i17 %a) {
         %tmp = zext i17 %a to i37               ; <i37> [#uses=2]
         %tmp21 = lshr i37 %tmp, 8               ; <i37> [#uses=1]
diff --git a/test/Transforms/InstCombine/cast-mul-select.ll b/test/Transforms/InstCombine/cast-mul-select.ll
index fcb7e23..f55423c 100644
--- a/test/Transforms/InstCombine/cast-mul-select.ll
+++ b/test/Transforms/InstCombine/cast-mul-select.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32"
+
 define i32 @mul(i32 %x, i32 %y) {
   %A = trunc i32 %x to i8
   %B = trunc i32 %y to i8
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 611ded4..8934404 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -1,6 +1,8 @@
 ; This tests for various complex cast elimination cases instcombine should
 ; handle.
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test1(i32 %X) {
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 79f86e9..e7695b7 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
new file mode 100644
index 0000000..2f98641
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -0,0 +1,29 @@
+; RUN: opt %s -instcombine -S | FileCheck %s
+; XFAIL: *
+; PR5438
+
+; TODO: This should also optimize down.
+;define i32 @bar(i32 %a, i32 %b) nounwind readnone {
+;entry:
+;        %0 = icmp sgt i32 %a, -1        ; <i1> [#uses=1]
+;        %1 = icmp slt i32 %b, 0         ; <i1> [#uses=1]
+;        %2 = xor i1 %1, %0              ; <i1> [#uses=1]
+;        %3 = zext i1 %2 to i32          ; <i32> [#uses=1]
+;        ret i32 %3
+;}
+
+define i32 @qaz(i32 %a, i32 %b) nounwind readnone {
+; CHECK: @qaz
+entry:
+; CHECK: xor i32 %a, %b
+; CHECK; lshr i32 %0, 31
+; CHECK: xor i32 %1, 1
+        %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
+        %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
+        %2 = icmp eq i32 %0, %1         ; <i1> [#uses=1]
+        %3 = zext i1 %2 to i32          ; <i32> [#uses=1]
+        ret i32 %3
+; CHECK-NOT: icmp
+; CHECK-NOT: zext
+; CHECK: ret i32 %2
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
new file mode 100644
index 0000000..7abd380
--- /dev/null
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -0,0 +1,12 @@
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+
+define i8 @test1(i8 %A, i8 %B) {
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
+  %y = extractvalue  {i8, i1} %x, 0
+  ret i8 %y
+; CHECK: @test1
+; CHECK-NEXT: %y = add i8 %A, %B
+; CHECK-NEXT: ret i8 %y
+}
diff --git a/test/Transforms/InstCombine/invariant.ll b/test/Transforms/InstCombine/invariant.ll
new file mode 100644
index 0000000..c67ad33
--- /dev/null
+++ b/test/Transforms/InstCombine/invariant.ll
@@ -0,0 +1,16 @@
+; Test to make sure unused llvm.invariant.start calls are not trivially eliminated
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @g(i8*)
+
+declare { }* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
+
+define i8 @f() {
+  %a = alloca i8                                  ; <i8*> [#uses=4]
+  store i8 0, i8* %a
+  %i = call { }* @llvm.invariant.start(i64 1, i8* %a) ; <{ }*> [#uses=0]
+  ; CHECK: call { }* @llvm.invariant.start
+  call void @g(i8* %a)
+  %r = load i8* %a                                ; <i8> [#uses=1]
+  ret i8 %r
+}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index b73ce3f..f0343e4 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -2,6 +2,8 @@
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128:n8:16:32:64"
+
 define i32 @test1(i32 %A, i1 %b) {
 BB0:
         br i1 %b, label %BB1, label %BB2
@@ -222,3 +224,141 @@ end:
 ; CHECK: ret i1 %z
 }
 
+
+define i64 @test12(i1 %cond, i8* %Ptr, i64 %Val) {
+entry:
+  %tmp41 = ptrtoint i8* %Ptr to i64
+  %tmp42 = zext i64 %tmp41 to i128
+  br i1 %cond, label %end, label %two
+
+two:
+  %tmp36 = zext i64 %Val to i128            ; <i128> [#uses=1]
+  %tmp37 = shl i128 %tmp36, 64                    ; <i128> [#uses=1]
+  %ins39 = or i128 %tmp42, %tmp37                 ; <i128> [#uses=1]
+  br label %end
+
+end:
+  %tmp869.0 = phi i128 [ %tmp42, %entry ], [ %ins39, %two ]
+  %tmp32 = trunc i128 %tmp869.0 to i64            ; <i64> [#uses=1]
+  %tmp29 = lshr i128 %tmp869.0, 64                ; <i128> [#uses=1]
+  %tmp30 = trunc i128 %tmp29 to i64               ; <i64> [#uses=1]
+
+  %tmp2 = add i64 %tmp32, %tmp30
+  ret i64 %tmp2
+; CHECK: @test12
+; CHECK-NOT: zext
+; CHECK: end:
+; CHECK-NEXT: phi i64 [ 0, %entry ], [ %Val, %two ]
+; CHECK-NOT: phi
+; CHECK: ret i64
+}
+
+declare void @test13f(double, i32)
+
+define void @test13(i1 %cond, i32 %V1, double %Vald) {
+entry:
+  %tmp42 = zext i32 %V1 to i128
+  br i1 %cond, label %end, label %two
+
+two:
+  %Val = bitcast double %Vald to i64
+  %tmp36 = zext i64 %Val to i128            ; <i128> [#uses=1]
+  %tmp37 = shl i128 %tmp36, 64                    ; <i128> [#uses=1]
+  %ins39 = or i128 %tmp42, %tmp37                 ; <i128> [#uses=1]
+  br label %end
+
+end:
+  %tmp869.0 = phi i128 [ %tmp42, %entry ], [ %ins39, %two ]
+  %tmp32 = trunc i128 %tmp869.0 to i32
+  %tmp29 = lshr i128 %tmp869.0, 64                ; <i128> [#uses=1]
+  %tmp30 = trunc i128 %tmp29 to i64               ; <i64> [#uses=1]
+  %tmp31 = bitcast i64 %tmp30 to double
+  
+  call void @test13f(double %tmp31, i32 %tmp32)
+  ret void
+; CHECK: @test13
+; CHECK-NOT: zext
+; CHECK: end:
+; CHECK-NEXT: phi double [ 0.000000e+00, %entry ], [ %Vald, %two ]
+; CHECK-NEXT: call void @test13f(double {{[^,]*}}, i32 %V1)
+; CHECK: ret void
+}
+
+define i640 @test14a(i320 %A, i320 %B, i1 %b1) {
+BB0:
+        %a = zext i320 %A to i640
+        %b = zext i320 %B to i640
+        br label %Loop
+
+Loop:
+        %C = phi i640 [ %a, %BB0 ], [ %b, %Loop ]             
+        br i1 %b1, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i640 %C
+; CHECK: @test14a
+; CHECK: Loop:
+; CHECK-NEXT: phi i320
+}
+
+define i160 @test14b(i320 %A, i320 %B, i1 %b1) {
+BB0:
+        %a = trunc i320 %A to i160
+        %b = trunc i320 %B to i160
+        br label %Loop
+
+Loop:
+        %C = phi i160 [ %a, %BB0 ], [ %b, %Loop ]             
+        br i1 %b1, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i160 %C
+; CHECK: @test14b
+; CHECK: Loop:
+; CHECK-NEXT: phi i160
+}
+
+declare i64 @test15a(i64)
+
+define i64 @test15b(i64 %A, i1 %b) {
+; CHECK: @test15b
+entry:
+  %i0 = zext i64 %A to i128
+  %i1 = shl i128 %i0, 64
+  %i = or i128 %i1, %i0
+  br i1 %b, label %one, label %two
+; CHECK: entry:
+; CHECK-NEXT: br i1 %b
+
+one:
+  %x = phi i128 [%i, %entry], [%y, %two]
+  %x1 = lshr i128 %x, 64
+  %x2 = trunc i128 %x1 to i64
+  %c = call i64 @test15a(i64 %x2)
+  %c1 = zext i64 %c to i128
+  br label %two
+
+; CHECK: one:
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: %c = call i64 @test15a
+
+two:
+  %y = phi i128 [%i, %entry], [%c1, %one]
+  %y1 = lshr i128 %y, 64
+  %y2 = trunc i128 %y1 to i64
+  %d = call i64 @test15a(i64 %y2)
+  %d1 = trunc i64 %d to i1
+  br i1 %d1, label %one, label %end
+
+; CHECK: two:
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: %d = call i64 @test15a
+
+end:
+  %g = trunc i128 %y to i64
+  ret i64 %g
+; CHECK: end: 
+; CHECK-NEXT: ret i64
+}
+
diff --git a/test/Transforms/InstCombine/sext-misc.ll b/test/Transforms/InstCombine/sext-misc.ll
index 107bba6..3346ff8 100644
--- a/test/Transforms/InstCombine/sext-misc.ll
+++ b/test/Transforms/InstCombine/sext-misc.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -instcombine -S | not grep sext
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
 declare i32 @llvm.ctpop.i32(i32)
 declare i32 @llvm.ctlz.i32(i32)
 declare i32 @llvm.cttz.i32(i32)
diff --git a/test/Transforms/InstCombine/udivrem-change-width.ll b/test/Transforms/InstCombine/udivrem-change-width.ll
index 56877e3..9983944 100644
--- a/test/Transforms/InstCombine/udivrem-change-width.ll
+++ b/test/Transforms/InstCombine/udivrem-change-width.ll
@@ -1,6 +1,8 @@
 ; RUN: opt < %s -instcombine -S | not grep zext
 ; PR4548
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
 define i8 @udiv_i8(i8 %a, i8 %b) nounwind {
   %conv = zext i8 %a to i32       
   %conv2 = zext i8 %b to i32      
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 3d936b8..ac31cdb 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -jump-threading -S | FileCheck %s
+; RUN: opt %s -jump-threading -S -enable-jump-threading-lvi | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
@@ -170,5 +170,241 @@ BB4:
 }
 
 
+;; This tests that the branch in 'merge' can be cloned up into T1.
+;; rdar://7367025
+define i32 @test7(i1 %cond, i1 %cond2) {
+Entry:
+; CHECK: @test7
+	%v1 = call i32 @f1()
+	br i1 %cond, label %Merge, label %F1
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%B = phi i32 [%v1, %Entry], [%v2, %F1]
+        %M = icmp ne i32 %B, %v1
+        %N = icmp eq i32 %B, 47
+        %O = and i1 %M, %N
+	br i1 %O, label %T2, label %F2
+
+; CHECK: Merge:
+; CHECK-NOT: phi
+; CHECK-NEXT:   %v2 = call i32 @f2()
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+; CHECK: F2:
+; CHECK-NEXT: phi i32
+}
+
+
+declare i1 @test8a()
+
+define i32 @test8b(i1 %cond, i1 %cond2) {
+; CHECK: @test8b
+T0:
+        %A = call i1 @test8a()
+	br i1 %A, label %T1, label %F1
+        
+; CHECK: T0:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %A, label %T1, label %Y
+
+T1:
+        %B = call i1 @test8a()
+	br i1 %B, label %T2, label %F1
+
+; CHECK: T1:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %B, label %T2, label %Y
+T2:
+        %C = call i1 @test8a()
+	br i1 %cond, label %T3, label %F1
+
+; CHECK: T2:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %cond, label %T3, label %Y
+T3:
+        ret i32 0
+
+F1:
+        %D = phi i32 [0, %T0], [0, %T1], [1, %T2]
+        %E = icmp eq i32 %D, 1
+        %F = and i1 %E, %cond
+	br i1 %F, label %X, label %Y
+X:
+        call i1 @test8a()
+        ret i32 1
+Y:
+        ret i32 2
+}
+
+
+;;; Verify that we can handle constraint propagation through "xor x, 1".
+define i32 @test9(i1 %cond, i1 %cond2) {
+Entry:
+; CHECK: @test9
+	%v1 = call i32 @f1()
+	br i1 %cond, label %Merge, label %F1
+
+; CHECK: Entry:
+; CHECK-NEXT:  %v1 = call i32 @f1()
+; CHECK-NEXT:  br i1 %cond, label %F2, label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%B = phi i32 [%v1, %Entry], [%v2, %F1]
+        %M = icmp eq i32 %B, %v1
+        %M1 = xor i1 %M, 1
+        %N = icmp eq i32 %B, 47
+        %O = and i1 %M1, %N
+	br i1 %O, label %T2, label %F2
+
+; CHECK: Merge:
+; CHECK-NOT: phi
+; CHECK-NEXT:   %v2 = call i32 @f2()
+
+T2:
+	%Q = zext i1 %M to i32
+	ret i32 %Q
+
+F2:
+	ret i32 %B
+; CHECK: F2:
+; CHECK-NEXT: phi i32
+}
+
+
+
+; CHECK: @test10
+declare i32 @test10f1()
+declare i32 @test10f2()
+declare void @test10f3()
+
+;; Non-local condition threading.
+define i32 @test10g(i1 %cond) {
+; CHECK: @test10g
+; CHECK-NEXT:   br i1 %cond, label %T2, label %F2
+        br i1 %cond, label %T1, label %F1
+
+T1:
+        %v1 = call i32 @test10f1()
+        br label %Merge
+        
+; CHECK: %v1 = call i32 @test10f1()
+; CHECK-NEXT: call void @f3()
+; CHECK-NEXT: ret i32 %v1
+
+F1:
+        %v2 = call i32 @test10f2()
+        br label %Merge
+
+Merge:
+        %B = phi i32 [%v1, %T1], [%v2, %F1]
+        br i1 %cond, label %T2, label %F2
+
+T2:
+        call void @f3()
+        ret i32 %B
+
+F2:
+        ret i32 %B
+}
+
+
+; Impossible conditional constraints should get threaded.  BB3 is dead here.
+define i32 @test11(i32 %A) {
+; CHECK: @test11
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br i1 %tmp455, label %BB4, label %BB2
+	%tmp455 = icmp eq i32 %A, 42
+	br i1 %tmp455, label %BB1, label %BB2
+        
+BB2:
+; CHECK: call i32 @f1()
+; CHECK-NEXT: ret i32 %C
+	%C = call i32 @f1()
+	ret i32 %C
+        
+
+BB1:
+	%tmp459 = icmp eq i32 %A, 43
+	br i1 %tmp459, label %BB3, label %BB4
+
+BB3:
+	call i32 @f2()
+        ret i32 3
+
+BB4:
+	call void @f3()
+	ret i32 4
+}
+
+;; Correlated value through boolean expression.  GCC PR18046.
+define void @test12(i32 %A) {
+; CHECK: @test12
+entry:
+  %cond = icmp eq i32 %A, 0
+  br i1 %cond, label %bb, label %bb1
+; Should branch to the return block instead of through BB1.
+; CHECK: entry:
+; CHECK-NEXT: %cond = icmp eq i32 %A, 0
+; CHECK-NEXT: br i1 %cond, label %bb1, label %return
+
+bb:                   
+  %B = call i32 @test10f2()
+  br label %bb1
+
+bb1:
+  %C = phi i32 [ %A, %entry ], [ %B, %bb ]
+  %cond4 = icmp eq i32 %C, 0
+  br i1 %cond4, label %bb2, label %return
+
+; CHECK: bb1:
+; CHECK-NEXT: %B = call i32 @test10f2()
+; CHECK-NEXT: %cond4 = icmp eq i32 %B, 0
+; CHECK-NEXT: br i1 %cond4, label %bb2, label %return
+
+bb2:
+  %D = call i32 @test10f2()
+  ret void
+
+return:
+  ret void
+}
+
+
+;;; Duplicate condition to avoid xor of cond.
+;;; TODO: Make this happen.
+define i32 @testXX(i1 %cond, i1 %cond2) {
+Entry:
+; CHECK: @testXX
+	%v1 = call i32 @f1()
+	br i1 %cond, label %Merge, label %F1
+
+F1:
+	br label %Merge
+
+Merge:
+	%B = phi i1 [true, %Entry], [%cond2, %F1]
+        %M = icmp eq i32 %v1, 192
+        %N = xor i1 %B, %M
+	br i1 %N, label %T2, label %F2
+
+T2:
+	ret i32 123
+
+F2:
+	ret i32 %v1
+}
 
 
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index 862b403..7e2a2a0 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -170,3 +170,25 @@ bb32.i:
         ret i32 1
 }
 
+
+define fastcc void @test5(i1 %tmp, i32 %tmp1) nounwind ssp {
+entry:
+  br i1 %tmp, label %bb12, label %bb13
+
+
+bb12:                                            
+  br label %bb13
+
+bb13:                                            
+  %.lcssa31 = phi i32 [ undef, %bb12 ], [ %tmp1, %entry ]
+  %A = and i1 undef, undef
+  br i1 %A, label %bb15, label %bb61
+
+bb15:                                            
+  ret void
+
+
+bb61:                                            
+  ret void
+}
+
diff --git a/test/Transforms/LCSSA/indirectbr.ll b/test/Transforms/LCSSA/indirectbr.ll
new file mode 100644
index 0000000..9656448
--- /dev/null
+++ b/test/Transforms/LCSSA/indirectbr.ll
@@ -0,0 +1,542 @@
+; RUN: opt < %s -lcssa -verify-loop-info -verify-dom-info -disable-output
+; PR5437
+
+; LCSSA should work correctly in the case of an indirectbr that exits
+; the loop, and the loop has exits with predecessors not within the loop
+; (and btw these edges are unsplittable due to the indirectbr).
+
+define i32 @js_Interpret() nounwind {
+entry:
+  br i1 undef, label %"4", label %"3"
+
+"3":                                              ; preds = %entry
+  ret i32 0
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"6", label %"5"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"6":                                              ; preds = %"4"
+  br i1 undef, label %"10", label %"13"
+
+"10":                                             ; preds = %"6"
+  br i1 undef, label %"22", label %"15"
+
+"13":                                             ; preds = %"6"
+  unreachable
+
+"15":                                             ; preds = %"23", %"10"
+  unreachable
+
+"22":                                             ; preds = %"10"
+  br label %"23"
+
+"23":                                             ; preds = %"1375", %"22"
+  %0 = phi i32 [ undef, %"22" ], [ %1, %"1375" ]  ; <i32> [#uses=1]
+  indirectbr i8* undef, [label %"15", label %"24", label %"25", label %"26", label %"27", label %"28", label %"29", label %"30", label %"32", label %"32", label %"33", label %"167", label %"173", label %"173", label %"173", label %"173", label %"173", label %"192", label %"193", label %"194", label %"196", label %"206", label %"231", label %"241", label %"251", label %"261", label %"307", label %"353", label %"354", label %"355", label %"361", label %"367", label %"400", label %"433", label %"466", label %"499", label %"509", label %"519", label %"529", label %"571", label %"589", label %"607", label %"635", label %"655", label %"664", label %"671", label %"680", label %"687", label %"692", label %"698", label %"704", label %"715", label %"715", label %"716", label %"725", label %"725", label %"725", label %"725", label %"724", label %"724", label %"724", label %"724", label %"737", label %"737", label %"737", label %"737", label %"761", label %"758", label %"759", label %"760", label %"766", label %"763", label %"764", label %"765", label %"771", label %"768", label %"769", label %"770", label %"780", label %"777", label %"778", label %"779", label %"821", label %"826", label %"831", label %"832", label %"833", label %"836", label %"836", label %"886", label %"905", label %"978", label %"978", label %"1136", label %"1166", label %"1179", label %"1201", label %"1212", label %"1212", label %"1274", label %"1284", label %"1284", label %"1346", label %"1347", label %"1348", label %"1349", label %"1350", label %"1353", label %"1353", label %"1353", label %"1355", label %"1355", label %"1357", label %"1357", label %"1358", label %"1359", label %"1374", label %"1375", label %"1376", label %"1377", label %"1378", label %"1379", label %"1386", label %"1395", label %"1394", label %"1425", label %"1426", label %"1440", label %"1449", label %"1455", label %"1461", label %"1471", label %"1482", label %"1484", label %"1486", label %"1489", label %"1489", label %"1492", label %"1494", label %"1494", label %"1497", label %"1499", label %"1499", label %"1515", label %"1546", label %"1546", label %"1566", label %"1584", label %"1587", label %"1591", label %"1605", label %"1609", label %"1609", label %"1640", label %"1648", label %"1651", label %"1703", label %"1710", label %"1718", label %"1724", label %"1725", label %"1726", label %"1727", label %"1728", label %"1731", label %"1732", label %"1733", label %"1734", label %"1735", label %"1741", label %"1750", label %"1752", label %"1754", label %"1755", label %"1757", label %"1759", label %"1761", label %"1764", label %"1764", label %"1766", label %"1768", label %"1775", label %"1775", label %"1781", label %"1781", label %"1790", label %"1827", label %"1836", label %"1836", label %"1845", label %"1845", label %"1848", label %"1849", label %"1851", label %"1853", label %"1856", label %"1861", label %"1861"]
+
+"24":                                             ; preds = %"23"
+  unreachable
+
+"25":                                             ; preds = %"23"
+  unreachable
+
+"26":                                             ; preds = %"23"
+  unreachable
+
+"27":                                             ; preds = %"23"
+  unreachable
+
+"28":                                             ; preds = %"23"
+  unreachable
+
+"29":                                             ; preds = %"23"
+  unreachable
+
+"30":                                             ; preds = %"23"
+  unreachable
+
+"32":                                             ; preds = %"23", %"23"
+  unreachable
+
+"33":                                             ; preds = %"23"
+  unreachable
+
+"167":                                            ; preds = %"23"
+  unreachable
+
+"173":                                            ; preds = %"23", %"23", %"23", %"23", %"23"
+  unreachable
+
+"192":                                            ; preds = %"23"
+  unreachable
+
+"193":                                            ; preds = %"23"
+  unreachable
+
+"194":                                            ; preds = %"23"
+  unreachable
+
+"196":                                            ; preds = %"23"
+  unreachable
+
+"206":                                            ; preds = %"23"
+  unreachable
+
+"231":                                            ; preds = %"23"
+  unreachable
+
+"241":                                            ; preds = %"23"
+  unreachable
+
+"251":                                            ; preds = %"23"
+  unreachable
+
+"261":                                            ; preds = %"23"
+  unreachable
+
+"307":                                            ; preds = %"23"
+  unreachable
+
+"353":                                            ; preds = %"23"
+  unreachable
+
+"354":                                            ; preds = %"23"
+  unreachable
+
+"355":                                            ; preds = %"23"
+  unreachable
+
+"361":                                            ; preds = %"23"
+  unreachable
+
+"367":                                            ; preds = %"23"
+  unreachable
+
+"400":                                            ; preds = %"23"
+  unreachable
+
+"433":                                            ; preds = %"23"
+  unreachable
+
+"466":                                            ; preds = %"23"
+  unreachable
+
+"499":                                            ; preds = %"23"
+  unreachable
+
+"509":                                            ; preds = %"23"
+  unreachable
+
+"519":                                            ; preds = %"23"
+  unreachable
+
+"529":                                            ; preds = %"23"
+  unreachable
+
+"571":                                            ; preds = %"23"
+  unreachable
+
+"589":                                            ; preds = %"23"
+  unreachable
+
+"607":                                            ; preds = %"23"
+  unreachable
+
+"635":                                            ; preds = %"23"
+  unreachable
+
+"655":                                            ; preds = %"23"
+  unreachable
+
+"664":                                            ; preds = %"23"
+  unreachable
+
+"671":                                            ; preds = %"23"
+  unreachable
+
+"680":                                            ; preds = %"23"
+  unreachable
+
+"687":                                            ; preds = %"23"
+  unreachable
+
+"692":                                            ; preds = %"23"
+  br label %"1862"
+
+"698":                                            ; preds = %"23"
+  unreachable
+
+"704":                                            ; preds = %"23"
+  unreachable
+
+"715":                                            ; preds = %"23", %"23"
+  unreachable
+
+"716":                                            ; preds = %"23"
+  unreachable
+
+"724":                                            ; preds = %"23", %"23", %"23", %"23"
+  unreachable
+
+"725":                                            ; preds = %"23", %"23", %"23", %"23"
+  unreachable
+
+"737":                                            ; preds = %"23", %"23", %"23", %"23"
+  unreachable
+
+"758":                                            ; preds = %"23"
+  unreachable
+
+"759":                                            ; preds = %"23"
+  unreachable
+
+"760":                                            ; preds = %"23"
+  unreachable
+
+"761":                                            ; preds = %"23"
+  unreachable
+
+"763":                                            ; preds = %"23"
+  unreachable
+
+"764":                                            ; preds = %"23"
+  unreachable
+
+"765":                                            ; preds = %"23"
+  br label %"766"
+
+"766":                                            ; preds = %"765", %"23"
+  unreachable
+
+"768":                                            ; preds = %"23"
+  unreachable
+
+"769":                                            ; preds = %"23"
+  unreachable
+
+"770":                                            ; preds = %"23"
+  unreachable
+
+"771":                                            ; preds = %"23"
+  unreachable
+
+"777":                                            ; preds = %"23"
+  unreachable
+
+"778":                                            ; preds = %"23"
+  unreachable
+
+"779":                                            ; preds = %"23"
+  unreachable
+
+"780":                                            ; preds = %"23"
+  unreachable
+
+"821":                                            ; preds = %"23"
+  unreachable
+
+"826":                                            ; preds = %"23"
+  unreachable
+
+"831":                                            ; preds = %"23"
+  unreachable
+
+"832":                                            ; preds = %"23"
+  unreachable
+
+"833":                                            ; preds = %"23"
+  unreachable
+
+"836":                                            ; preds = %"23", %"23"
+  unreachable
+
+"886":                                            ; preds = %"23"
+  unreachable
+
+"905":                                            ; preds = %"23"
+  unreachable
+
+"978":                                            ; preds = %"23", %"23"
+  unreachable
+
+"1136":                                           ; preds = %"23"
+  unreachable
+
+"1166":                                           ; preds = %"23"
+  unreachable
+
+"1179":                                           ; preds = %"23"
+  unreachable
+
+"1201":                                           ; preds = %"23"
+  unreachable
+
+"1212":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1274":                                           ; preds = %"23"
+  unreachable
+
+"1284":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1346":                                           ; preds = %"23"
+  unreachable
+
+"1347":                                           ; preds = %"23"
+  unreachable
+
+"1348":                                           ; preds = %"23"
+  unreachable
+
+"1349":                                           ; preds = %"23"
+  unreachable
+
+"1350":                                           ; preds = %"23"
+  unreachable
+
+"1353":                                           ; preds = %"23", %"23", %"23"
+  unreachable
+
+"1355":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1357":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1358":                                           ; preds = %"23"
+  unreachable
+
+"1359":                                           ; preds = %"23"
+  unreachable
+
+"1374":                                           ; preds = %"23"
+  unreachable
+
+"1375":                                           ; preds = %"23"
+  %1 = zext i8 undef to i32                       ; <i32> [#uses=1]
+  br label %"23"
+
+"1376":                                           ; preds = %"23"
+  unreachable
+
+"1377":                                           ; preds = %"23"
+  unreachable
+
+"1378":                                           ; preds = %"23"
+  unreachable
+
+"1379":                                           ; preds = %"23"
+  unreachable
+
+"1386":                                           ; preds = %"23"
+  unreachable
+
+"1394":                                           ; preds = %"23"
+  unreachable
+
+"1395":                                           ; preds = %"23"
+  unreachable
+
+"1425":                                           ; preds = %"23"
+  unreachable
+
+"1426":                                           ; preds = %"23"
+  unreachable
+
+"1440":                                           ; preds = %"23"
+  unreachable
+
+"1449":                                           ; preds = %"23"
+  unreachable
+
+"1455":                                           ; preds = %"23"
+  unreachable
+
+"1461":                                           ; preds = %"23"
+  unreachable
+
+"1471":                                           ; preds = %"23"
+  unreachable
+
+"1482":                                           ; preds = %"23"
+  unreachable
+
+"1484":                                           ; preds = %"23"
+  unreachable
+
+"1486":                                           ; preds = %"23"
+  unreachable
+
+"1489":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1492":                                           ; preds = %"23"
+  unreachable
+
+"1494":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1497":                                           ; preds = %"23"
+  unreachable
+
+"1499":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1515":                                           ; preds = %"23"
+  unreachable
+
+"1546":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1566":                                           ; preds = %"23"
+  br i1 undef, label %"1569", label %"1568"
+
+"1568":                                           ; preds = %"1566"
+  unreachable
+
+"1569":                                           ; preds = %"1566"
+  unreachable
+
+"1584":                                           ; preds = %"23"
+  unreachable
+
+"1587":                                           ; preds = %"23"
+  unreachable
+
+"1591":                                           ; preds = %"23"
+  unreachable
+
+"1605":                                           ; preds = %"23"
+  unreachable
+
+"1609":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1640":                                           ; preds = %"23"
+  unreachable
+
+"1648":                                           ; preds = %"23"
+  unreachable
+
+"1651":                                           ; preds = %"23"
+  unreachable
+
+"1703":                                           ; preds = %"23"
+  unreachable
+
+"1710":                                           ; preds = %"23"
+  unreachable
+
+"1718":                                           ; preds = %"23"
+  unreachable
+
+"1724":                                           ; preds = %"23"
+  unreachable
+
+"1725":                                           ; preds = %"23"
+  unreachable
+
+"1726":                                           ; preds = %"23"
+  unreachable
+
+"1727":                                           ; preds = %"23"
+  unreachable
+
+"1728":                                           ; preds = %"23"
+  unreachable
+
+"1731":                                           ; preds = %"23"
+  unreachable
+
+"1732":                                           ; preds = %"23"
+  unreachable
+
+"1733":                                           ; preds = %"23"
+  unreachable
+
+"1734":                                           ; preds = %"23"
+  unreachable
+
+"1735":                                           ; preds = %"23"
+  unreachable
+
+"1741":                                           ; preds = %"23"
+  unreachable
+
+"1750":                                           ; preds = %"23"
+  unreachable
+
+"1752":                                           ; preds = %"23"
+  unreachable
+
+"1754":                                           ; preds = %"23"
+  unreachable
+
+"1755":                                           ; preds = %"23"
+  unreachable
+
+"1757":                                           ; preds = %"23"
+  unreachable
+
+"1759":                                           ; preds = %"23"
+  unreachable
+
+"1761":                                           ; preds = %"23"
+  unreachable
+
+"1764":                                           ; preds = %"23", %"23"
+  %2 = icmp eq i32 %0, 168                        ; <i1> [#uses=0]
+  unreachable
+
+"1766":                                           ; preds = %"23"
+  unreachable
+
+"1768":                                           ; preds = %"23"
+  unreachable
+
+"1775":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1781":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1790":                                           ; preds = %"23"
+  unreachable
+
+"1827":                                           ; preds = %"23"
+  unreachable
+
+"1836":                                           ; preds = %"23", %"23"
+  br label %"1862"
+
+"1845":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1848":                                           ; preds = %"23"
+  unreachable
+
+"1849":                                           ; preds = %"23"
+  unreachable
+
+"1851":                                           ; preds = %"23"
+  unreachable
+
+"1853":                                           ; preds = %"23"
+  unreachable
+
+"1856":                                           ; preds = %"23"
+  unreachable
+
+"1861":                                           ; preds = %"23", %"23"
+  unreachable
+
+"41":                                             ; preds = %"23", %"23"
+  unreachable
+
+"1862":                                           ; preds = %"1836", %"692"
+  unreachable
+}
diff --git a/test/Transforms/LoopSimplify/indirectbr.ll b/test/Transforms/LoopSimplify/indirectbr.ll
new file mode 100644
index 0000000..b023847
--- /dev/null
+++ b/test/Transforms/LoopSimplify/indirectbr.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -loopsimplify -lcssa -verify-loop-info -verify-dom-info -S \
+; RUN:   | grep -F {indirectbr i8* %x, \[label %L0, label %L1\]} \
+; RUN:   | count 6
+
+; LoopSimplify should not try to transform loops when indirectbr is involved.
+
+define void @entry(i8* %x) {
+entry:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L0
+
+L1:
+  ret void
+}
+
+define void @backedge(i8* %x) {
+entry:
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+}
+
+define i64 @exit(i8* %x) {
+entry:
+  br label %L2
+
+L2:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L2
+
+L1:
+  ret i64 %z
+}
+
+define i64 @criticalexit(i8* %x, i1 %a) {
+entry:
+  br i1 %a, label %L1, label %L2
+
+L2:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L2
+
+L1:
+  %y = phi i64 [ %z, %L2 ], [ 1, %entry ]
+  ret i64 %y
+}
+
+define i64 @exit_backedge(i8* %x) {
+entry:
+  br label %L0
+
+L0:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L1:
+  ret i64 %z
+}
+
+define i64 @criticalexit_backedge(i8* %x, i1 %a) {
+entry:
+  br i1 %a, label %L0, label %L1
+
+L0:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L1:
+  %y = phi i64 [ %z, %L0 ], [ 1, %entry ]
+  ret i64 %y
+}
diff --git a/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll b/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll
new file mode 100644
index 0000000..4032a59
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin11
+
+define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE() nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb3.i, label %bb4.i
+
+bb3.i:                                            ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  br i1 undef, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit
+
+bb.i.i:                                           ; preds = %bb4.i
+  unreachable
+
+_ZNK4llvm8CallSite14getCalledValueEv.exit:        ; preds = %bb4.i
+  br i1 undef, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i
+
+bb6.i:                                            ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
+  unreachable
+
+_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
+  br i1 undef, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i
+
+bb.i:                                             ; preds = %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+  br label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
+
+_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit: ; preds = %bb.i, %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+  br i1 undef, label %bb50, label %bb27
+
+bb27:                                             ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
+  br i1 undef, label %bb1.i727, label %bb.i.i726
+
+bb.i.i726:                                        ; preds = %bb27
+  unreachable
+
+bb1.i727:                                         ; preds = %bb27
+  unreachable
+
+bb50:                                             ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
+  br label %bb107
+
+bb51:                                             ; preds = %bb107
+  br i1 undef, label %bb105, label %bb106
+
+bb105:                                            ; preds = %bb51
+  unreachable
+
+bb106:                                            ; preds = %bb51
+  br label %bb107
+
+bb107:                                            ; preds = %bb106, %bb50
+  br i1 undef, label %bb108, label %bb51
+
+bb108:                                            ; preds = %bb107
+  br i1 undef, label %bb242, label %bb114
+
+bb114:                                            ; preds = %bb108
+  br i1 undef, label %bb141, label %bb116
+
+bb116:                                            ; preds = %bb114
+  br i1 undef, label %bb120, label %bb121
+
+bb120:                                            ; preds = %bb116
+  unreachable
+
+bb121:                                            ; preds = %bb116
+  unreachable
+
+bb141:                                            ; preds = %bb114
+  br i1 undef, label %bb182, label %bb143
+
+bb143:                                            ; preds = %bb141
+  br label %bb157
+
+bb144:                                            ; preds = %bb.i.i.i843
+  switch i32 undef, label %bb155 [
+    i32 2, label %bb153
+    i32 6, label %bb153
+    i32 4, label %bb153
+  ]
+
+bb153:                                            ; preds = %bb144, %bb144, %bb144
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  br label %bb157
+
+bb155:                                            ; preds = %bb144
+  unreachable
+
+bb157:                                            ; preds = %bb153, %bb143
+  %indvar = phi i32 [ %indvar.next, %bb153 ], [ 0, %bb143 ] ; <i32> [#uses=2]
+  %0 = icmp eq i32 undef, %indvar                 ; <i1> [#uses=1]
+  switch i16 undef, label %bb6.i841 [
+    i16 9, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+    i16 26, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+  ]
+
+bb6.i841:                                         ; preds = %bb157
+  unreachable
+
+_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit: ; preds = %bb157, %bb157
+  br i1 undef, label %bb.i.i.i843, label %bb1.i.i.i844
+
+bb.i.i.i843:                                      ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+  br i1 %0, label %bb158, label %bb144
+
+bb1.i.i.i844:                                     ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+  unreachable
+
+bb158:                                            ; preds = %bb.i.i.i843
+  br i1 undef, label %bb177, label %bb176
+
+bb176:                                            ; preds = %bb158
+  unreachable
+
+bb177:                                            ; preds = %bb158
+  br i1 undef, label %bb179, label %bb178
+
+bb178:                                            ; preds = %bb177
+  unreachable
+
+bb179:                                            ; preds = %bb177
+  unreachable
+
+bb182:                                            ; preds = %bb141
+  unreachable
+
+bb242:                                            ; preds = %bb108
+  unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/count-to-zero.ll b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
new file mode 100644
index 0000000..8cc3b5c
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; rdar://7382068
+
+define void @t(i32 %c) nounwind optsize {
+entry:
+  br label %bb6
+
+bb1:                                              ; preds = %bb6
+  %tmp = icmp eq i32 %c_addr.1, 20                ; <i1> [#uses=1]
+  br i1 %tmp, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  %tmp1 = tail call i32 @f20(i32 %c_addr.1) nounwind ; <i32> [#uses=1]
+  br label %bb7
+
+bb3:                                              ; preds = %bb1
+  %tmp2 = icmp slt i32 %c_addr.1, 10              ; <i1> [#uses=1]
+  %tmp3 = add nsw i32 %c_addr.1, 1                ; <i32> [#uses=1]
+  %tmp4 = add i32 %c_addr.1, -1                   ; <i32> [#uses=1]
+  %c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+; CHECK: sub i32 %lsr.iv, 1
+  br label %bb6
+
+bb6:                                              ; preds = %bb3, %entry
+  %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %c_addr.1 = phi i32 [ %c_addr.1.be, %bb3 ], [ %c, %entry ] ; <i32> [#uses=7]
+  %tmp5 = icmp eq i32 %indvar, 9999               ; <i1> [#uses=1]
+; CHECK: icmp eq i32 %lsr.iv, 0
+  %tmp6 = icmp eq i32 %c_addr.1, 100              ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp5, %tmp6                   ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb7, label %bb1
+
+bb7:                                              ; preds = %bb6, %bb2
+  %c_addr.0 = phi i32 [ %tmp1, %bb2 ], [ %c_addr.1, %bb6 ] ; <i32> [#uses=1]
+  tail call void @bar(i32 %c_addr.0) nounwind
+  ret void
+}
+
+declare i32 @f20(i32)
+
+declare void @bar(i32)
diff --git a/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll b/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
new file mode 100644
index 0000000..4ad5d14
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb4.preheader, label %bb.nph8
+
+bb4.preheader:                                    ; preds = %entry
+  br label %bb4
+
+bb1:                                              ; preds = %bb4
+  br i1 undef, label %bb.nph8, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %phitmp = add i32 %indvar, 1                    ; <i32> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb4.preheader
+; CHECK: %lsr.iv = phi
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
+; CHECK: %0 = icmp slt i32 %lsr.iv.next, %argc
+  %indvar = phi i32 [ 1, %bb4.preheader ], [ %phitmp, %bb3 ] ; <i32> [#uses=2]
+  %0 = icmp slt i32 %indvar, %argc                ; <i1> [#uses=1]
+  br i1 %0, label %bb1, label %bb.nph8
+
+bb.nph8:                                          ; preds = %bb4, %bb1, %entry
+  unreachable
+}
diff --git a/test/Transforms/SCCP/crash.ll b/test/Transforms/SCCP/crash.ll
index e34eaca..2f6da1d 100644
--- a/test/Transforms/SCCP/crash.ll
+++ b/test/Transforms/SCCP/crash.ll
@@ -22,3 +22,8 @@ bb34:
 return:
   ret void
 }
+
+define i32 @test2([4 x i32] %A) {
+  %B = extractvalue [4 x i32] %A, 1
+  ret i32 %B
+}
diff --git a/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll b/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
index 7b8a918..c6ae6ac 100644
--- a/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplifycfg | llvm-dis
+; RUN: opt < %s -simplifycfg -disable-output
 ; PR3016
 ; Dead use caused invariant violation.
 
diff --git a/test/Transforms/SimplifyLibCalls/memcmp.ll b/test/Transforms/SimplifyLibCalls/memcmp.ll
index 7008736..ed7bcac 100644
--- a/test/Transforms/SimplifyLibCalls/memcmp.ll
+++ b/test/Transforms/SimplifyLibCalls/memcmp.ll
@@ -17,6 +17,10 @@ define void @test(i8* %P, i8* %Q, i32 %N, i32* %IP, i1* %BP) {
 	%D = call i32 @memcmp( i8* %P, i8* %Q, i32 2 )		; <i32> [#uses=1]
 	%E = icmp eq i32 %D, 0		; <i1> [#uses=1]
 	volatile store i1 %E, i1* %BP
+        %F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0),
+                              i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0),
+                              i32 3)
+        volatile store i32 %F, i32* %IP
 	ret void
 }
 
diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll
new file mode 100644
index 0000000..87cb9dd
--- /dev/null
+++ b/test/Transforms/TailCallElim/nocapture.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -tailcallelim -S | FileCheck %s
+; XFAIL: *
+
+declare void @use(i8* nocapture, i8* nocapture)
+
+define i8* @foo(i8* nocapture %A, i1 %cond) {
+; CHECK: tailrecurse:
+; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ]
+; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ]
+  %B = alloca i8
+; CHECK: %B = alloca i8
+  br i1 %cond, label %cond_true, label %cond_false
+; CHECK: br i1 %cond.tr, label %cond_true, label %cond_false
+cond_true:
+; CHECK: cond_true:
+; CHECK: br label %tailrecurse
+  call i8* @foo(i8* %B, i1 false)
+  ret i8* null
+cond_false:
+; CHECK: cond_false
+  call void @use(i8* %A, i8* %B)
+; CHECK: tail call void @use(i8* %A.tr, i8* %B)
+  ret i8* null
+; CHECK: ret i8* null
+}
diff --git a/test/Transforms/TailCallElim/switch.ll b/test/Transforms/TailCallElim/switch.ll
new file mode 100644
index 0000000..3388431
--- /dev/null
+++ b/test/Transforms/TailCallElim/switch.ll
@@ -0,0 +1,34 @@
+; RUN: opt %s -tailcallelim -S | FileCheck %s
+
+define i64 @fib(i64 %n) nounwind readnone {
+; CHECK: @fib
+entry:
+; CHECK: tailrecurse:
+; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ]
+; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ]
+  switch i64 %n, label %bb1 [
+; CHECK: switch i64 %n.tr, label %bb1 [
+    i64 0, label %bb2
+    i64 1, label %bb2
+  ]
+
+bb1:
+; CHECK: bb1:
+  %0 = add i64 %n, -1
+; CHECK: %0 = add i64 %n.tr, -1
+  %1 = tail call i64 @fib(i64 %0) nounwind
+; CHECK: %1 = tail call i64 @fib(i64 %0)
+  %2 = add i64 %n, -2
+; CHECK: %2 = add i64 %n.tr, -2
+  %3 = tail call i64 @fib(i64 %2) nounwind
+; CHECK-NOT: tail call i64 @fib
+  %4 = add nsw i64 %3, %1
+; CHECK: add nsw i64 %accumulator.tr, %1
+  ret i64 %4
+; CHECK: br label %tailrecurse
+
+bb2:
+; CHECK: bb2:
+  ret i64 %n
+; CHECK: ret i64 %accumulator.tr
+}
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 6fd3998..8321593 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -7,8 +7,7 @@ import os
 # name: The name of this test suite.
 config.name = 'LLVM-Unit'
 
-# suffixes: A list of file extensions to treat as test files, this is actually
-# set by on_clone().
+# suffixes: A list of file extensions to treat as test files.
 config.suffixes = []
 
 # test_source_root: The root path where tests are located.
diff --git a/test/Unit/lit.site.cfg.in b/test/Unit/lit.site.cfg.in
new file mode 100644
index 0000000..c190ffa
--- /dev/null
+++ b/test/Unit/lit.site.cfg.in
@@ -0,0 +1,10 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvmgcc_dir = "@LLVMGCCDIR@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
+
+# Let the main config do the real work.
+lit.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
diff --git a/test/lit.cfg b/test/lit.cfg
index 1965615..1939792 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -22,6 +22,31 @@ llvm_obj_root = getattr(config, 'llvm_obj_root', None)
 if llvm_obj_root is not None:
     config.test_exec_root = os.path.join(llvm_obj_root, 'test')
 
+# Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
+# dir (if available).
+if llvm_obj_root is not None:
+    llvm_src_root = getattr(config, 'llvm_src_root', None)
+    if not llvm_src_root:
+        lit.fatal('No LLVM source root set!')
+    path = os.path.pathsep.join((os.path.join(llvm_src_root, 'test',
+                                              'Scripts'),
+                                 config.environment['PATH']))
+    config.environment['PATH'] = path
+
+    llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+    if not llvm_tools_dir:
+        lit.fatal('No LLVM tools dir set!')
+    path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
+    config.environment['PATH'] = path
+
+    llvmgcc_dir = getattr(config, 'llvmgcc_dir', None)
+    if not llvm_tools_dir:
+        lit.fatal('No llvm-gcc dir set!')
+    if llvmgcc_dir:
+        path = os.path.pathsep.join((os.path.join(llvmgcc_dir, 'bin'),
+                                     config.environment['PATH']))
+        config.environment['PATH'] = path
+
 ###
 
 import os
diff --git a/test/site.exp.in b/test/site.exp.in
index 6a74ba8..f88d361 100644
--- a/test/site.exp.in
+++ b/test/site.exp.in
@@ -1,9 +1,10 @@
-## Autogenerated by LLVM/Clang configuration.
+## Autogenerated by LLVM configuration.
 # Do not edit!
-set target_triplet "@target@"
+set target_triplet "@TARGET_TRIPLE@"
 set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
 set llvmgcc_langs "@LLVMGCC_LANGS@"
 set llvmgcc_version "@LLVMGCC_VERSION@"
+set llvmtoolsdir "@LLVM_TOOLS_DIR@"
 set llvmlibsdir "@LLVM_LIBS_DIR@"
 set llvm_bindings "@LLVM_BINDINGS@"
 set srcroot "@LLVM_SOURCE_DIR@"
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 4551d41..645776e 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include <fstream>
 #include <sstream>
@@ -200,14 +199,15 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
-  if (!SharedLibs.empty())
-    throw ToolExecutionError("LLI currently does not support "
-                             "loading shared libraries.");
-
   std::vector<const char*> LLIArgs;
   LLIArgs.push_back(LLIPath.c_str());
   LLIArgs.push_back("-force-interpreter=true");
 
+  for (std::vector<std::string>::const_iterator i = SharedLibs.begin(), e = SharedLibs.end(); i != e; ++i) {
+    LLIArgs.push_back("-load");
+    LLIArgs.push_back((*i).c_str());
+  }
+
   // Add any extra LLI args.
   for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
     LLIArgs.push_back(ToolArgs[i].c_str());
@@ -610,9 +610,10 @@ IsARMArchitecture(std::vector<std::string> Args)
 {
   for (std::vector<std::string>::const_iterator
          I = Args.begin(), E = Args.end(); I != E; ++I) {
-    if (!StringsEqualNoCase(*I, "-arch")) {
+    StringRef S(*I);
+    if (!S.equals_lower("-arch")) {
       ++I;
-      if ((I != E) && !StringsEqualNoCase(I->c_str(), "arm", strlen("arm"))) {
+      if (I != E && !S.substr(0, strlen("arm")).equals_lower("arm")) {
         return true;
       }
     }
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index 7638f3c..8a710ea 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -36,9 +36,6 @@ foreach(l ${LLVM_SYSTEM_LIBS_LIST})
   set(LLVM_SYSTEM_LIBS ${LLVM_SYSTEM_LIBS} "-l${l}")
 endforeach()
 
-include(GetTargetTriple)
-get_target_triple(target)
-
 foreach(c ${LLVM_TARGETS_TO_BUILD})
   set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
 endforeach(c)
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index c621721..eb82f98 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ModuleProvider.h"
 #include "llvm/PassManager.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Verifier.h"
@@ -242,51 +243,16 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
 
     // build argument list
     std::vector<const char*> args;
-    std::string targetTriple = _linker.getModule()->getTargetTriple();
+    llvm::Triple targetTriple(_linker.getModule()->getTargetTriple());
+    const char *arch = targetTriple.getArchNameForAssembler();
+
     args.push_back(tool.c_str());
-    if ( targetTriple.find("darwin") != std::string::npos ) {
+
+    if (targetTriple.getOS() == Triple::Darwin) {
         // darwin specific command line options
-        if (strncmp(targetTriple.c_str(), "i386-apple-", 11) == 0) {
-            args.push_back("-arch");
-            args.push_back("i386");
-        }
-        else if (strncmp(targetTriple.c_str(), "x86_64-apple-", 13) == 0) {
-            args.push_back("-arch");
-            args.push_back("x86_64");
-        }
-        else if (strncmp(targetTriple.c_str(), "powerpc-apple-", 14) == 0) {
-            args.push_back("-arch");
-            args.push_back("ppc");
-        }
-        else if (strncmp(targetTriple.c_str(), "powerpc64-apple-", 16) == 0) {
-            args.push_back("-arch");
-            args.push_back("ppc64");
-        }
-        else if (strncmp(targetTriple.c_str(), "arm-apple-", 10) == 0) {
-            args.push_back("-arch");
-            args.push_back("arm");
-        }
-        else if ((strncmp(targetTriple.c_str(), "armv4t-apple-", 13) == 0) ||
-                 (strncmp(targetTriple.c_str(), "thumbv4t-apple-", 15) == 0)) {
-            args.push_back("-arch");
-            args.push_back("armv4t");
-        }
-        else if ((strncmp(targetTriple.c_str(), "armv5-apple-", 12) == 0) ||
-                 (strncmp(targetTriple.c_str(), "armv5e-apple-", 13) == 0) ||
-                 (strncmp(targetTriple.c_str(), "thumbv5-apple-", 14) == 0) ||
-                 (strncmp(targetTriple.c_str(), "thumbv5e-apple-", 15) == 0)) {
-            args.push_back("-arch");
-            args.push_back("armv5");
-        }
-        else if ((strncmp(targetTriple.c_str(), "armv6-apple-", 12) == 0) ||
-                 (strncmp(targetTriple.c_str(), "thumbv6-apple-", 14) == 0)) {
-            args.push_back("-arch");
-            args.push_back("armv6");
-        }
-        else if ((strncmp(targetTriple.c_str(), "armv7-apple-", 12) == 0) ||
-                 (strncmp(targetTriple.c_str(), "thumbv7-apple-", 14) == 0)) {
+        if (arch != NULL) {
             args.push_back("-arch");
-            args.push_back("armv7");
+            args.push_back(arch);
         }
         // add -static to assembler command line when code model requires
         if ( (_assemblerPath != NULL) && (_codeModel == LTO_CODEGEN_PIC_MODEL_STATIC) )
diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp
index 15a5379..afac651 100644
--- a/unittests/ADT/DenseMapTest.cpp
+++ b/unittests/ADT/DenseMapTest.cpp
@@ -164,4 +164,16 @@ TEST_F(DenseMapTest, IterationTest) {
   }
 }
 
+// const_iterator test
+TEST_F(DenseMapTest, ConstIteratorTest) {
+  // Check conversion from iterator to const_iterator.
+  DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin();
+  DenseMap<uint32_t, uint32_t>::const_iterator cit(it);
+  EXPECT_TRUE(it == cit);
+
+  // Check copying of const_iterators.
+  DenseMap<uint32_t, uint32_t>::const_iterator cit2(cit);
+  EXPECT_TRUE(cit == cit2);
+}
+
 }
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index cdc476e..11545d5 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -9,6 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -110,6 +111,92 @@ TEST(StringRefTest, Split) {
             Str.rsplit('o'));
 }
 
+// XFAIL for PR5482, StringRef is miscompiled by Apple gcc.
+#if (!defined(__llvm__) && defined(__APPLE__) && defined(__OPTIMIZE__))
+#define SKIP_SPLIT2
+#endif
+#ifndef SKIP_SPLIT2
+TEST(StringRefTest, Split2) {
+  SmallVector<StringRef, 5> parts;
+  SmallVector<StringRef, 5> expected;
+
+  expected.push_back("ab"); expected.push_back("c");
+  StringRef(",ab,,c,").split(parts, ",", -1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(""); expected.push_back("ab"); expected.push_back("");
+  expected.push_back("c"); expected.push_back("");
+  StringRef(",ab,,c,").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef("").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ",", -1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef(",").split(parts, ",", -1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(""); expected.push_back("");
+  StringRef(",").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b");
+  StringRef("a,b").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  // Test MaxSplit
+  expected.clear(); parts.clear();
+  expected.push_back("a,,b,c");
+  StringRef("a,,b,c").split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a,,b,c");
+  StringRef("a,,b,c").split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(",b,c");
+  StringRef("a,,b,c").split(parts, ",", 1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(",b,c");
+  StringRef("a,,b,c").split(parts, ",", 1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(""); expected.push_back("b,c");
+  StringRef("a,,b,c").split(parts, ",", 2, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b,c");
+  StringRef("a,,b,c").split(parts, ",", 2, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(""); expected.push_back("b");
+  expected.push_back("c");
+  StringRef("a,,b,c").split(parts, ",", 3, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
+  StringRef("a,,b,c").split(parts, ",", 3, false);
+  EXPECT_TRUE(parts == expected);
+}
+#endif
+
 TEST(StringRefTest, StartsWith) {
   StringRef Str("hello");
   EXPECT_TRUE(Str.startswith("he"));
@@ -125,6 +212,8 @@ TEST(StringRefTest, Find) {
   EXPECT_EQ(0U, Str.find("hello"));
   EXPECT_EQ(1U, Str.find("ello"));
   EXPECT_EQ(StringRef::npos, Str.find("zz"));
+  EXPECT_EQ(2U, Str.find("ll", 2));
+  EXPECT_EQ(StringRef::npos, Str.find("ll", 3));
 
   EXPECT_EQ(3U, Str.rfind('l'));
   EXPECT_EQ(StringRef::npos, Str.rfind('z'));
@@ -132,6 +221,14 @@ TEST(StringRefTest, Find) {
   EXPECT_EQ(0U, Str.rfind("hello"));
   EXPECT_EQ(1U, Str.rfind("ello"));
   EXPECT_EQ(StringRef::npos, Str.rfind("zz"));
+
+  EXPECT_EQ(2U, Str.find_first_of('l'));
+  EXPECT_EQ(1U, Str.find_first_of("el"));
+  EXPECT_EQ(StringRef::npos, Str.find_first_of("xyz"));
+
+  EXPECT_EQ(1U, Str.find_first_not_of('h'));
+  EXPECT_EQ(4U, Str.find_first_not_of("hel"));
+  EXPECT_EQ(StringRef::npos, Str.find_first_not_of("hello"));
 }
 
 TEST(StringRefTest, Count) {
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index e0568ad..98b2922 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -61,6 +61,7 @@ class RecordingJITMemoryManager : public JITMemoryManager {
 public:
   RecordingJITMemoryManager()
     : Base(JITMemoryManager::CreateDefaultMemManager()) {
+    stubsAllocated = 0;
   }
 
   virtual void setMemoryWritable() { Base->setMemoryWritable(); }
@@ -68,8 +69,6 @@ public:
   virtual void setPoisonMemory(bool poison) { Base->setPoisonMemory(poison); }
   virtual void AllocateGOT() { Base->AllocateGOT(); }
   virtual uint8_t *getGOTBase() const { return Base->getGOTBase(); }
-  virtual void SetDlsymTable(void *ptr) { Base->SetDlsymTable(ptr); }
-  virtual void *getDlsymTable() const { return Base->getDlsymTable(); }
   struct StartFunctionBodyCall {
     StartFunctionBodyCall(uint8_t *Result, const Function *F,
                           uintptr_t ActualSize, uintptr_t ActualSizeResult)
@@ -90,8 +89,10 @@ public:
       StartFunctionBodyCall(Result, F, InitialActualSize, ActualSize));
     return Result;
   }
+  int stubsAllocated;
   virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
                                 unsigned Alignment) {
+    stubsAllocated++;
     return Base->allocateStub(F, StubSize, Alignment);
   }
   struct EndFunctionBodyCall {
@@ -303,7 +304,6 @@ TEST_F(JITTest, FarCallToKnownFunction) {
       ConstantInt::get(TypeBuilder<int, false>::get(Context), 7));
   Builder.CreateRet(result);
 
-  TheJIT->EnableDlsymStubs(false);
   TheJIT->DisableLazyCompilation(true);
   int (*TestFunctionPtr)() = reinterpret_cast<int(*)()>(
       (intptr_t)TheJIT->getPointerToFunction(TestFunction));
@@ -437,10 +437,16 @@ TEST_F(JITTest, ModuleDeletion) {
             RJMM->deallocateFunctionBodyCalls.size());
 
   SmallPtrSet<const void*, 2> ExceptionTablesDeallocated;
+  unsigned NumTablesDeallocated = 0;
   for (unsigned i = 0, e = RJMM->deallocateExceptionTableCalls.size();
        i != e; ++i) {
     ExceptionTablesDeallocated.insert(
         RJMM->deallocateExceptionTableCalls[i].ET);
+    if (RJMM->deallocateExceptionTableCalls[i].ET != NULL) {
+        // If JITEmitDebugInfo is off, we'll "deallocate" NULL, which doesn't
+        // appear in startExceptionTableCalls.
+        NumTablesDeallocated++;
+    }
   }
   for (unsigned i = 0, e = RJMM->startExceptionTableCalls.size(); i != e; ++i) {
     EXPECT_TRUE(ExceptionTablesDeallocated.count(
@@ -449,9 +455,49 @@ TEST_F(JITTest, ModuleDeletion) {
       << RJMM->startExceptionTableCalls[i].F_dump;
   }
   EXPECT_EQ(RJMM->startExceptionTableCalls.size(),
-            RJMM->deallocateExceptionTableCalls.size());
+            NumTablesDeallocated);
 }
 
+#if !defined(__arm__) && !defined(__powerpc__) && !defined(__ppc__)
+typedef int (*FooPtr) ();
+
+TEST_F(JITTest, NoStubs) {
+  LoadAssembly("define void @bar() {"
+	       "entry: "
+	       "ret void"
+	       "}"
+	       " "
+	       "define i32 @foo() {"
+	       "entry:"
+	       "call void @bar()"
+	       "ret i32 undef"
+	       "}"
+	       " "
+	       "define i32 @main() {"
+	       "entry:"
+	       "%0 = call i32 @foo()"
+	       "call void @bar()"
+	       "ret i32 undef"
+	       "}");
+  Function *foo = M->getFunction("foo");
+  uintptr_t tmp = (uintptr_t)(TheJIT->getPointerToFunction(foo));
+  FooPtr ptr = (FooPtr)(tmp);
+
+  (ptr)();
+
+  // We should now allocate no more stubs, we have the code to foo
+  // and the existing stub for bar.
+  int stubsBefore = RJMM->stubsAllocated;
+  Function *func = M->getFunction("main");
+  TheJIT->getPointerToFunction(func);
+
+  Function *bar = M->getFunction("bar");
+  TheJIT->getPointerToFunction(bar);
+
+  ASSERT_EQ(stubsBefore, RJMM->stubsAllocated);
+}
+#endif
+
 // This code is copied from JITEventListenerTest, but it only runs once for all
 // the tests in this directory.  Everything seems fine, but that's strange
 // behavior.
diff --git a/utils/Misc/zkill b/utils/Misc/zkill
new file mode 100755
index 0000000..bc0bfd5
--- /dev/null
+++ b/utils/Misc/zkill
@@ -0,0 +1,276 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+
+def _write_message(kind, message):
+    import inspect, os, sys
+
+    # Get the file/line where this message was generated.
+    f = inspect.currentframe()
+    # Step out of _write_message, and then out of wrapper.
+    f = f.f_back.f_back
+    file,line,_,_,_ = inspect.getframeinfo(f)
+    location = '%s:%d' % (os.path.basename(file), line)
+
+    print >>sys.stderr, '%s: %s: %s' % (location, kind, message)
+
+note = lambda message: _write_message('note', message)
+warning = lambda message: _write_message('warning', message)
+error = lambda message: (_write_message('error', message), sys.exit(1))
+
+def re_full_match(pattern, str):
+    m = re.match(pattern, str)
+    if m and m.end() != len(str):
+        m = None
+    return m
+
+def parse_time(value):
+    minutes,value = value.split(':',1)
+    if '.' in value:
+        seconds,fseconds = value.split('.',1)
+    else:
+        seconds = value
+    return int(minutes) * 60 + int(seconds) + float('.'+fseconds)
+
+def extractExecutable(command):
+    """extractExecutable - Given a string representing a command line, attempt
+    to extract the executable path, even if it includes spaces."""
+
+    # Split into potential arguments.
+    args = command.split(' ')
+
+    # Scanning from the beginning, try to see if the first N args, when joined,
+    # exist. If so that's probably the executable.
+    for i in range(1,len(args)):
+        cmd = ' '.join(args[:i])
+        if os.path.exists(cmd):
+            return cmd
+
+    # Otherwise give up and return the first "argument".
+    return args[0]
+
+class Struct:
+    def __init__(self, **kwargs):
+        self.fields = kwargs.keys()
+        self.__dict__.update(kwargs)
+
+    def __repr__(self):
+        return 'Struct(%s)' % ', '.join(['%s=%r' % (k,getattr(self,k))
+                                         for k in self.fields])
+
+kExpectedPSFields = [('PID', int, 'pid'),
+                     ('USER', str, 'user'),
+                     ('COMMAND', str, 'command'),
+                     ('%CPU', float, 'cpu_percent'),
+                     ('TIME', parse_time, 'cpu_time'),
+                     ('VSZ', int, 'vmem_size'),
+                     ('RSS', int, 'rss')]
+def getProcessTable():
+    import subprocess
+    p = subprocess.Popen(['ps', 'aux'], stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    out,err = p.communicate()
+    res = p.wait()
+    if p.wait():
+        error('unable to get process table')
+    elif err.strip():
+        error('unable to get process table: %s' % err)
+
+    lns = out.split('\n')
+    it = iter(lns)
+    header = it.next().split()
+    numRows = len(header)
+
+    # Make sure we have the expected fields.
+    indexes = []
+    for field in kExpectedPSFields:
+        try:
+            indexes.append(header.index(field[0]))
+        except:
+            if opts.debug:
+                raise
+            error('unable to get process table, no %r field.' % field[0])
+
+    table = []
+    for i,ln in enumerate(it):
+        if not ln.strip():
+            continue
+
+        fields = ln.split(None, numRows - 1)
+        if len(fields) != numRows:
+            warning('unable to process row: %r' % ln)
+            continue
+
+        record = {}
+        for field,idx in zip(kExpectedPSFields, indexes):
+            value = fields[idx]
+            try:
+                record[field[2]] = field[1](value)
+            except:
+                if opts.debug:
+                    raise
+                warning('unable to process %r in row: %r' % (field[0], ln))
+                break
+        else:
+            # Add our best guess at the executable.
+            record['executable'] = extractExecutable(record['command'])
+            table.append(Struct(**record))
+
+    return table
+
+def getSignalValue(name):
+    import signal
+    if name.startswith('SIG'):
+        value = getattr(signal, name)
+        if value and isinstance(value, int):
+            return value
+    error('unknown signal: %r' % name)
+
+import signal
+kSignals = {}
+for name in dir(signal):
+    if name.startswith('SIG') and name == name.upper() and name.isalpha():
+        kSignals[name[3:]] = getattr(signal, name)
+
+def main():
+    global opts
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {pid}*")
+
+    # FIXME: Add -NNN and -SIGNAME options.
+
+    parser.add_option("-s", "", dest="signalName",
+                      help="Name of the signal to use (default=%default)",
+                      action="store", default='INT',
+                      choices=kSignals.keys())
+    parser.add_option("-l", "", dest="listSignals",
+                      help="List known signal names",
+                      action="store_true", default=False)
+
+    parser.add_option("-n", "--dry-run", dest="dryRun",
+                      help="Only print the actions that would be taken",
+                      action="store_true", default=False)
+    parser.add_option("-v", "--verbose", dest="verbose",
+                      help="Print more verbose output",
+                      action="store_true", default=False)
+    parser.add_option("", "--debug", dest="debug",
+                      help="Enable debugging output",
+                      action="store_true", default=False)
+    parser.add_option("", "--force", dest="force",
+                      help="Perform the specified commands, even if it seems like a bad idea",
+                      action="store_true", default=False)
+
+    inf = float('inf')
+    group = OptionGroup(parser, "Process Filters")
+    group.add_option("", "--name", dest="execName", metavar="REGEX",
+                      help="Kill processes whose name matches the given regexp",
+                      action="store", default=None)
+    group.add_option("", "--exec", dest="execPath", metavar="REGEX",
+                      help="Kill processes whose executable matches the given regexp",
+                      action="store", default=None)
+    group.add_option("", "--user", dest="userName", metavar="REGEX",
+                      help="Kill processes whose user matches the given regexp",
+                      action="store", default=None)
+    group.add_option("", "--min-cpu", dest="minCPU", metavar="PCT",
+                      help="Kill processes with CPU usage >= PCT",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-cpu", dest="maxCPU", metavar="PCT",
+                      help="Kill processes with CPU usage <= PCT",
+                      action="store", type=float, default=inf)
+    group.add_option("", "--min-mem", dest="minMem", metavar="N",
+                      help="Kill processes with virtual size >= N (MB)",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-mem", dest="maxMem", metavar="N",
+                      help="Kill processes with virtual size <= N (MB)",
+                      action="store", type=float, default=inf)
+    group.add_option("", "--min-rss", dest="minRSS", metavar="N",
+                      help="Kill processes with RSS >= N",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-rss", dest="maxRSS", metavar="N",
+                      help="Kill processes with RSS <= N",
+                      action="store", type=float, default=inf)
+    group.add_option("", "--min-time", dest="minTime", metavar="N",
+                      help="Kill processes with CPU time >= N (seconds)",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-time", dest="maxTime", metavar="N",
+                      help="Kill processes with CPU time <= N (seconds)",
+                      action="store", type=float, default=inf)
+    parser.add_option_group(group)
+
+    (opts, args) = parser.parse_args()
+
+    if opts.listSignals:
+        items = [(v,k) for k,v in kSignals.items()]
+        items.sort()
+        for i in range(0, len(items), 4):
+            print '\t'.join(['%2d) SIG%s' % (k,v)
+                             for k,v in items[i:i+4]])
+        sys.exit(0)
+
+    # Figure out the signal to use.
+    signal = kSignals[opts.signalName]
+    signalValueName = str(signal)
+    if opts.verbose:
+        name = dict((v,k) for k,v in kSignals.items()).get(signal,None)
+        if name:
+            signalValueName = name
+            note('using signal %d (SIG%s)' % (signal, name))
+        else:
+            note('using signal %d' % signal)
+
+    # Get the pid list to consider.
+    pids = set()
+    for arg in args:
+        try:
+            pids.add(int(arg))
+        except:
+            parser.error('invalid positional argument: %r' % arg)
+
+    filtered = ps = getProcessTable()
+
+    # Apply filters.
+    if pids:
+        filtered = [p for p in filtered
+                    if p.pid in pids]
+    if opts.execName is not None:
+        filtered = [p for p in filtered
+                    if re_full_match(opts.execName,
+                                     os.path.basename(p.executable))]
+    if opts.execPath is not None:
+        filtered = [p for p in filtered
+                    if re_full_match(opts.execPath, p.executable)]
+    if opts.userName is not None:
+        filtered = [p for p in filtered
+                    if re_full_match(opts.userName, p.user)]
+    filtered = [p for p in filtered
+                if opts.minCPU <= p.cpu_percent <= opts.maxCPU]
+    filtered = [p for p in filtered
+                if opts.minMem <= float(p.vmem_size) / (1<<20) <= opts.maxMem]
+    filtered = [p for p in filtered
+                if opts.minRSS <= p.rss <= opts.maxRSS]
+    filtered = [p for p in filtered
+                if opts.minTime <= p.cpu_time <= opts.maxTime]
+
+    if len(filtered) == len(ps):
+        if not opts.force and not opts.dryRun:
+            error('refusing to kill all processes without --force')
+
+    if not filtered:
+        warning('no processes selected')
+
+    for p in filtered:
+        if opts.verbose:
+            note('kill(%r, %s) # (user=%r, executable=%r, CPU=%2.2f%%, time=%r, vmem=%r, rss=%r)' %
+                 (p.pid, signalValueName, p.user, p.executable, p.cpu_percent, p.cpu_time, p.vmem_size, p.rss))
+        if not opts.dryRun:
+            try:
+                os.kill(p.pid, signal)
+            except OSError:
+                if opts.debug:
+                    raise
+                warning('unable to kill PID: %r' % p.pid)
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index ed22b77..a8cf8de 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -43,6 +43,7 @@ use Socket;
 #                   the source tree.
 #  -noremove        Do not remove the BUILDDIR after it has been built.
 #  -noremoveresults Do not remove the WEBDIR after it has been built.
+#  -noclean         Do not run 'make clean' before building.
 #  -nobuild         Do not build llvm. If tests are enabled perform them
 #                   on the llvm build specified in the build directory
 #  -release         Build an LLVM Release version
@@ -53,6 +54,7 @@ use Socket;
 #                   building LLVM.
 #  -use-gmake       Use gmake instead of the default make command to build
 #                   llvm and run tests.
+#  -llvmgccdir      Next argument specifies the llvm-gcc install prefix.
 #
 # TESTING OPTIONS:
 #  -notest          Do not even attempt to run the test programs.
@@ -115,20 +117,14 @@ $TestSVNURL    = 'http://llvm.org/svn/llvm-project' unless $TestSVNURL;
 my $BuildDir   = $ENV{'BUILDDIR'};
 my $WebDir     = $ENV{'WEBDIR'};
 
-my $LLVMSrcDir   = $ENV{'LLVMSRCDIR'};
-$LLVMSrcDir    = "$BuildDir/llvm" unless $LLVMSrcDir;
-my $LLVMObjDir   = $ENV{'LLVMOBJDIR'};
-$LLVMObjDir    = "$BuildDir/llvm" unless $LLVMObjDir;
-my $LLVMTestDir   = $ENV{'LLVMTESTDIR'};
-$LLVMTestDir    = "$BuildDir/llvm/projects/llvm-test" unless $LLVMTestDir;
-
 ##############################################################
 #
 # Calculate the date prefix...
 #
 ##############################################################
+use POSIX;
 @TIME = localtime;
-my $DATE = sprintf "%4d-%02d-%02d_%02d-%02d", $TIME[5]+1900, $TIME[4]+1, $TIME[3], $TIME[1], $TIME[0];
+my $DATE = strftime("%Y-%m-%d_%H-%M-%S", localtime());
 
 ##############################################################
 #
@@ -147,6 +143,14 @@ $SUBMIT = 1;
 $PARALLELJOBS = "2";
 my $TESTFLAGS="";
 
+if ($ENV{'LLVMGCCDIR'}) {
+  $CONFIGUREARGS .= " --with-llvmgccdir=" . $ENV{'LLVMGCCDIR'};
+  $LLVMGCCPATH = $ENV{'LLVMGCCDIR'} . '/bin';
+}
+else {
+  $LLVMGCCPATH = "";
+}
+
 while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   shift;
   last if /^--$/;  # Stop processing arguments on --
@@ -154,6 +158,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   # List command line options here...
   if (/^-config$/)         { $CONFIG_PATH = "$ARGV[0]"; shift; next; }
   if (/^-nocheckout$/)     { $NOCHECKOUT = 1; next; }
+  if (/^-noclean$/)        { $NOCLEAN = 1; next; }
   if (/^-noremove$/)       { $NOREMOVE = 1; next; }
   if (/^-noremoveatend$/)  { $NOREMOVEATEND = 1; next; }
   if (/^-noremoveresults$/){ $NOREMOVERESULTS = 1; next; }
@@ -211,23 +216,19 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   if (/^-test-cxxflags/)   { $TESTFLAGS = "$TESTFLAGS CXXFLAGS=\'$ARGV[0]\'";
                              shift; next; }
   if (/^-compileflags/)    { $MAKEOPTS = "$MAKEOPTS $ARGV[0]"; shift; next; }
+  if (/^-llvmgccdir/)      { $CONFIGUREARGS .= " --with-llvmgccdir=\'$ARGV[0]\'";
+                             $LLVMGCCPATH = $ARGV[0] . '/bin';
+                             shift; next;}
+  if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
   if (/^-use-gmake/)       { $MAKECMD = "gmake"; shift; next; }
   if (/^-extraflags/)      { $CONFIGUREARGS .=
                              " --with-extra-options=\'$ARGV[0]\'"; shift; next;}
   if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
-  if (/^-nodejagnu$/)      { $NODEJAGNU = 1; next; }
+  if (/^-nodejagnu$/)      { next; }
   if (/^-nobuild$/)        { $NOBUILD = 1; next; }
   print "Unknown option: $_ : ignoring!\n";
 }
 
-if ($ENV{'LLVMGCCDIR'}) {
-  $CONFIGUREARGS .= " --with-llvmgccdir=" . $ENV{'LLVMGCCDIR'};
-  $LLVMGCCPATH = $ENV{'LLVMGCCDIR'} . '/bin';
-}
-else {
-  $LLVMGCCPATH = "";
-}
-
 if ($CONFIGUREARGS !~ /--disable-jit/) {
   $CONFIGUREARGS .= " --enable-jit";
 }
@@ -264,6 +265,13 @@ if ($nickname eq "") {
        "\"-nickname <nickname>\"");
 }
 
+my $LLVMSrcDir   = $ENV{'LLVMSRCDIR'};
+$LLVMSrcDir    = "$BuildDir/llvm" unless $LLVMSrcDir;
+my $LLVMObjDir   = $ENV{'LLVMOBJDIR'};
+$LLVMObjDir    = "$BuildDir/llvm" unless $LLVMObjDir;
+my $LLVMTestDir   = $ENV{'LLVMTESTDIR'};
+$LLVMTestDir    = "$BuildDir/llvm/projects/llvm-test" unless $LLVMTestDir;
+
 ##############################################################
 #
 # Define the file names we'll use
@@ -381,39 +389,6 @@ sub CopyFile { #filename, newfile
 
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# This function is meant to read in the dejagnu sum file and
-# return a string with only the results (i.e. PASS/FAIL/XPASS/
-# XFAIL).
-#
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sub GetDejagnuTestResults { # (filename, log)
-    my ($filename, $DejagnuLog) = @_;
-    my @lines;
-    $/ = "\n"; #Make sure we're going line at a time.
-
-    if( $VERBOSE) { print "DEJAGNU TEST RESULTS:\n"; }
-
-    if (open SRCHFILE, $filename) {
-        # Process test results
-        while ( <SRCHFILE> ) {
-            if ( length($_) > 1 ) {
-                chomp($_);
-                if ( m/^(PASS|XPASS|FAIL|XFAIL): .*\/llvm\/test\/(.*)$/ ) {
-                    push(@lines, "$1: test/$2");
-                }
-            }
-        }
-    }
-    close SRCHFILE;
-
-    my $content = join("\n", @lines);
-    return $content;
-}
-
-
-
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
 # This function acts as a mini web browswer submitting data
 # to our central server via the post method
 #
@@ -523,7 +498,9 @@ sub BuildLLVM {
   RunLoggedCommand("(time -p $NICE ./configure $CONFIGUREARGS $EXTRAFLAGS) ",
                    $ConfigureLog, "CONFIGURE");
   # Build the entire tree, capturing the output into $BuildLog
-  RunAppendingLoggedCommand("($NICE $MAKECMD $MAKEOPTS clean)", $BuildLog, "BUILD CLEAN");
+  if (!$NOCLEAN) {
+      RunAppendingLoggedCommand("($NICE $MAKECMD $MAKEOPTS clean)", $BuildLog, "BUILD CLEAN");
+  }
   RunAppendingLoggedCommand("(time -p $NICE $MAKECMD $MAKEOPTS)", $BuildLog, "BUILD");
 
   if (`grep '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 ||
@@ -534,21 +511,6 @@ sub BuildLLVM {
   return 1;
 }
 
-# Running dejagnu tests and save results to log.
-sub RunDejaGNUTests {
-  die "Invalid call!" unless $ConfigMode == 0;
-  # Run the feature and regression tests, results are put into testrun.sum and
-  # the full log in testrun.log.
-  system "rm -f test/testrun.log test/testrun.sum";
-  RunLoggedCommand("(time -p $MAKECMD $MAKEOPTS check)", $DejagnuLog, "DEJAGNU");
-
-  # Copy the testrun.log and testrun.sum to our webdir.
-  CopyFile("test/testrun.log", $DejagnuLog);
-  CopyFile("test/testrun.sum", $DejagnuSum);
-
-  return GetDejagnuTestResults($DejagnuSum, $DejagnuLog);
-}
-
 # Run the named tests (i.e. "SingleSource" "MultiSource" "External")
 sub TestDirectory {
   my $SubDir = shift;
@@ -557,11 +519,9 @@ sub TestDirectory {
 
   my $ProgramTestLog = "$Prefix-$SubDir-ProgramTest.txt";
 
-  # Make sure to clean things if in non-config mode.
-  if ($ConfigMode == 1) {
-    RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS clean $TESTFLAGS",
-                     $ProgramTestLog, "TEST DIRECTORY $SubDir");
-  }
+  # Make sure to clean the test results.
+  RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS clean $TESTFLAGS",
+                   $ProgramTestLog, "TEST DIRECTORY $SubDir");
 
   # Run the programs tests... creating a report.nightly.csv file.
   my $LLCBetaOpts = "";
@@ -660,9 +620,6 @@ if ($CONFIG_PATH ne "") {
   $ConfigureLog = "$Prefix-Configure-Log.txt";
   $BuildLog = "$Prefix-Build-Log.txt";
   $COLog = "$Prefix-CVS-Log.txt";
-  $DejagnuLog = "$Prefix-Dejagnu-testrun.log";
-  $DejagnuSum = "$Prefix-Dejagnu-testrun.sum";
-  $DejagnuLog = "$Prefix-DejagnuTests-Log.txt";
 }
 
 if ($VERBOSE) {
@@ -693,7 +650,6 @@ if ($VERBOSE) {
 $starttime = `date "+20%y-%m-%d %H:%M:%S"`;
 
 my $BuildError = 0, $BuildStatus = "OK";
-my $DejagnuTestResults = "Dejagnu skipped by user choice.";
 if ($ConfigMode == 0) {
   if (!$NOCHECKOUT) {
     CheckoutSource();
@@ -708,14 +664,8 @@ if ($ConfigMode == 0) {
       if( $VERBOSE) { print  "\n***ERROR BUILDING TREE\n\n"; }
       $BuildError = 1;
       $BuildStatus = "Error: compilation aborted";
-      $NODEJAGNU=1;
     }
   }
-
-  # Run DejaGNU.
-  if (!$NODEJAGNU && !$BuildError) {
-    $DejagnuTestResults = RunDejaGNUTests();
-  }
 }
 
 # Run the llvm-test tests.
@@ -758,6 +708,8 @@ if ($GCCPATH ne "") {
 my $gcc_version = (split '\n', $gcc_version_long)[0];
 
 # Get llvm-gcc target triple.
+#
+# FIXME: This shouldn't be hardwired to llvm-gcc.
 my $llvmgcc_version_long = "";
 if ($LLVMGCCPATH ne "") {
   $llvmgcc_version_long = `$LLVMGCCPATH/llvm-gcc -v 2>&1`;
@@ -768,11 +720,10 @@ if ($LLVMGCCPATH ne "") {
 my $targetTriple = $1;
 
 # Logs.
-my ($ConfigureLogData, $BuildLogData, $DejagnuLogData, $CheckoutLogData) = "";
+my ($ConfigureLogData, $BuildLogData, $CheckoutLogData) = "";
 if ($ConfigMode == 0) {
   $ConfigureLogData = ReadFile $ConfigureLog;
   $BuildLogData = ReadFile $BuildLog;
-  $DejagnuLogData = ReadFile $DejagnuLog;
   $CheckoutLogData = ReadFile $COLog;
 }
 
@@ -798,14 +749,6 @@ my $BuildWallTime = GetRegex "^real ([0-9.]+)", $BuildLogData;
 $BuildTime=-1 unless $BuildTime;
 $BuildWallTime=-1 unless $BuildWallTime;
 
-# DejaGNU info.
-my $DejagnuTimeU = GetRegex "^user ([0-9.]+)", $DejagnuLogData;
-my $DejagnuTimeS = GetRegex "^sys ([0-9.]+)", $DejagnuLogData;
-$DejagnuTime  = $DejagnuTimeU+$DejagnuTimeS;  # DejagnuTime = User+System
-$DejagnuWallTime = GetRegex "^real ([0-9.]+)", $DejagnuLogData;
-$DejagnuTime     = "0.0" unless $DejagnuTime;
-$DejagnuWallTime = "0.0" unless $DejagnuWallTime;
-
 if ( $VERBOSE ) { print "SEND THE DATA VIA THE POST REQUEST\n"; }
 
 my %hash_of_data = (
@@ -813,8 +756,8 @@ my %hash_of_data = (
   'build_data' => $ConfigureLogData . $BuildLogData,
   'gcc_version' => $gcc_version,
   'nickname' => $nickname,
-  'dejagnutime_wall' => $DejagnuWallTime,
-  'dejagnutime_cpu' => $DejagnuTime,
+  'dejagnutime_wall' => "0.0",
+  'dejagnutime_cpu' => "0.0",
   'cvscheckouttime_wall' => $CheckoutTime_Wall,
   'cvscheckouttime_cpu' => $CheckoutTime_CPU,
   'configtime_wall' => $ConfigWallTime,
@@ -830,8 +773,8 @@ my %hash_of_data = (
   'expfail_tests' => $xfails,
   'unexpfail_tests' => $fails,
   'all_tests' => $all_tests,
-  'dejagnutests_results' => $DejagnuTestResults,
-  'dejagnutests_log' => $DejagnuLogData,
+  'dejagnutests_results' => "Dejagnu skipped by user choice.",
+  'dejagnutests_log' => "",
   'starttime' => $starttime,
   'endtime' => $endtime,
   'target_triple' => $targetTriple,
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index ff348e8..ff83c76 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -695,7 +695,6 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   O << "\n#ifndef NO_ASM_WRITER_BOILERPLATE\n";
   
   O << "  if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {\n"
-    << "    O << \"\\t\";\n"
     << "    printInlineAsm(MI);\n"
     << "    return;\n"
     << "  } else if (MI->isLabel()) {\n"
@@ -705,6 +704,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     << "    printImplicitDef(MI);\n"
     << "    return;\n"
     << "  } else if (MI->getOpcode() == TargetInstrInfo::KILL) {\n"
+    << "    printKill(MI);\n"
     << "    return;\n"
     << "  }\n\n";
 
@@ -786,7 +786,6 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     O << "  return;\n";
   }
 
-  O << "  return;\n";
   O << "}\n";
 }
 
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index e763795..da4b1cc 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -229,7 +229,7 @@ class ComplexPattern {
   unsigned Properties; // Node properties
   unsigned Attributes; // Pattern attributes
 public:
-  ComplexPattern() : NumOperands(0) {};
+  ComplexPattern() : NumOperands(0) {}
   ComplexPattern(Record *R);
 
   MVT::SimpleValueType getValueType() const { return Ty; }
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index c3520c1..0c78f56 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -1359,7 +1359,7 @@ public:
       Pat->setTypes(Other->getExtTypes());
       // The top level node type is checked outside of the select function.
       if (!isRoot)
-        emitCheck(Prefix + ".getNode()->getValueType(0) == " +
+        emitCheck(Prefix + ".getValueType() == " +
                   getName(Pat->getTypeNum(0)));
       return true;
     }
@@ -1786,11 +1786,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
         }
 
         CallerCode += ");";
-        CalleeCode += ") ";
-        // Prevent emission routines from being inlined to reduce selection
-        // routines stack frame sizes.
-        CalleeCode += "DISABLE_INLINE ";
-        CalleeCode += "{\n";
+        CalleeCode += ") {\n";
 
         for (std::vector<std::string>::const_reverse_iterator
                I = AddedInits.rbegin(), E = AddedInits.rend(); I != E; ++I)
@@ -1811,6 +1807,9 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
         } else {
           EmitFuncNum = EmitFunctions.size();
           EmitFunctions.insert(std::make_pair(CalleeCode, EmitFuncNum));
+          // Prevent emission routines from being inlined to reduce selection
+          // routines stack frame sizes.
+          OS << "DISABLE_INLINE ";
           OS << "SDNode *Emit_" << utostr(EmitFuncNum) << CalleeCode;
         }
 
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 3c7b44a..bf0721e 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -66,6 +66,7 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
      << "  virtual bool needsStackRealignment(const MachineFunction &) const\n"
      << "     { return false; }\n"
      << "  unsigned getSubReg(unsigned RegNo, unsigned Index) const;\n"
+     << "  unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const;\n"
      << "};\n\n";
 
   const std::vector<CodeGenRegisterClass> &RegisterClasses =
@@ -831,6 +832,23 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   OS << "  };\n";
   OS << "  return 0;\n";
   OS << "}\n\n";
+
+  OS << "unsigned " << ClassName 
+     << "::getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {\n"
+     << "  switch (RegNo) {\n"
+     << "  default:\n    return 0;\n";
+  for (std::map<Record*, std::vector<std::pair<int, Record*> > >::iterator 
+        I = SubRegVectors.begin(), E = SubRegVectors.end(); I != E; ++I) {
+    OS << "  case " << getQualifiedName(I->first) << ":\n";
+    for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+      OS << "    if (SubRegNo == "
+         << getQualifiedName((I->second)[i].second)
+         << ")  return " << (I->second)[i].first << ";\n";
+    OS << "    return 0;\n";
+  }
+  OS << "  };\n";
+  OS << "  return 0;\n";
+  OS << "}\n\n";
   
   // Emit the constructor of the class...
   OS << ClassName << "::" << ClassName
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index c8cf234..0dbfcbe 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -519,6 +519,8 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
   OS << Target;
   OS << "Subtarget::ParseSubtargetFeatures(const std::string &FS,\n"
      << "                                  const std::string &CPU) {\n"
+     << "  DEBUG(errs() << \"\\nFeatures:\" << FS);\n"
+     << "  DEBUG(errs() << \"\\nCPU:\" << CPU);\n"
      << "  SubtargetFeatures Features(FS);\n"
      << "  Features.setCPUIfNone(CPU);\n"
      << "  uint32_t Bits =  Features.getBits(SubTypeKV, SubTypeKVSize,\n"
@@ -558,6 +560,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   EmitSourceFileHeader("Subtarget Enumeration Source Fragment", OS);
 
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
   OS << "#include \"llvm/Target/SubtargetFeature.h\"\n";
   OS << "#include \"llvm/Target/TargetInstrItineraries.h\"\n\n";
   
diff --git a/utils/lit/ExampleTests.ObjDir/lit.site.cfg b/utils/lit/ExampleTests.ObjDir/lit.site.cfg
new file mode 100644
index 0000000..14b6e01
--- /dev/null
+++ b/utils/lit/ExampleTests.ObjDir/lit.site.cfg
@@ -0,0 +1,15 @@
+# -*- Python -*-
+
+# Site specific configuration file.
+#
+# Typically this will be generated by the build system to automatically set
+# certain configuration variables which cannot be autodetected, so that 'lit'
+# can easily be used on the command line.
+
+import os
+
+# Preserve the obj_root, for use by the main lit.cfg.
+config.example_obj_root = os.path.dirname(__file__)
+
+lit.load_config(config, os.path.join(config.test_source_root,
+                                     'lit.cfg'))
diff --git a/utils/lit/ExampleTests/Clang/fsyntax-only.c b/utils/lit/ExampleTests/Clang/fsyntax-only.c
new file mode 100644
index 0000000..a4a064b
--- /dev/null
+++ b/utils/lit/ExampleTests/Clang/fsyntax-only.c
@@ -0,0 +1,4 @@
+// RUN: clang -fsyntax-only -Xclang -verify %s
+
+int f0(void) {} // expected-warning {{control reaches end of non-void function}}
+
diff --git a/utils/lit/ExampleTests/Clang/lit.cfg b/utils/lit/ExampleTests/Clang/lit.cfg
new file mode 100644
index 0000000..114ac60
--- /dev/null
+++ b/utils/lit/ExampleTests/Clang/lit.cfg
@@ -0,0 +1,80 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = 'Clang'
+
+# testFormat: The test format to use to interpret tests.
+#
+# For now we require '&&' between commands, until they get globally killed and
+# the test runner updated.
+config.test_format = lit.formats.ShTest(execute_external = True)
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.m', '.mm']
+
+# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
+config.target_triple = 'foo'
+
+###
+
+# Discover the 'clang' and 'clangcc' to use.
+
+import os
+
+def inferClang(PATH):
+    # Determine which clang to use.
+    clang = os.getenv('CLANG')
+
+    # If the user set clang in the environment, definitely use that and don't
+    # try to validate.
+    if clang:
+        return clang
+
+    # Otherwise look in the path.
+    clang = lit.util.which('clang', PATH)
+
+    if not clang:
+        lit.fatal("couldn't find 'clang' program, try setting "
+                  "CLANG in your environment")
+
+    return clang
+
+def inferClangCC(clang, PATH):
+    clangcc = os.getenv('CLANGCC')
+
+    # If the user set clang in the environment, definitely use that and don't
+    # try to validate.
+    if clangcc:
+        return clangcc
+
+    # Otherwise try adding -cc since we expect to be looking in a build
+    # directory.
+    if clang.endswith('.exe'):
+        clangccName = clang[:-4] + '-cc.exe'
+    else:
+        clangccName = clang + '-cc'
+    clangcc = lit.util.which(clangccName, PATH)
+    if not clangcc:
+        # Otherwise ask clang.
+        res = lit.util.capture([clang, '-print-prog-name=clang-cc'])
+        res = res.strip()
+        if res and os.path.exists(res):
+            clangcc = res
+
+    if not clangcc:
+        lit.fatal("couldn't find 'clang-cc' program, try setting "
+                  "CLANGCC in your environment")
+
+    return clangcc
+
+clang = inferClang(config.environment['PATH'])
+if not lit.quiet:
+    lit.note('using clang: %r' % clang)
+config.substitutions.append( (' clang ', ' ' + clang + ' ') )
+
+clang_cc = inferClangCC(clang, config.environment['PATH'])
+if not lit.quiet:
+    lit.note('using clang-cc: %r' % clang_cc)
+config.substitutions.append( (' clang-cc ', ' ' + clang_cc + ' ') )
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll b/utils/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
new file mode 100644
index 0000000..3017b13
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
@@ -0,0 +1,3 @@
+; RUN: true
+; XFAIL: *
+; XTARGET: darwin
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp b/utils/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
new file mode 100644
index 0000000..2bda07a
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
+
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/lit.cfg b/utils/lit/ExampleTests/LLVM.InTree/test/lit.cfg
new file mode 100644
index 0000000..e7ef037
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.InTree/test/lit.cfg
@@ -0,0 +1,151 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.TclTest()
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.dirname(config.test_source_root)
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
+
+###
+
+# Load site data from DejaGNU's site.exp.
+import re
+site_exp = {}
+# FIXME: Implement lit.site.cfg.
+for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
+    m = re.match('set ([^ ]+) "([^"]*)"', line)
+    if m:
+        site_exp[m.group(1)] = m.group(2)
+
+# Add substitutions.
+for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
+            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'bugpoint_topts']:
+    if sub in ('llvmgcc', 'llvmgxx'):
+        config.substitutions.append(('%' + sub,
+                                     site_exp[sub] + ' -emit-llvm -w'))
+    else:
+        config.substitutions.append(('%' + sub, site_exp[sub]))
+
+excludes = []
+
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# Provide llvm_supports_target for use in local configs.
+targets = set(site_exp["TARGETS_TO_BUILD"].split())
+def llvm_supports_target(name):
+    return name in targets
+
+langs = set(site_exp['llvmgcc_langs'].split(','))
+def llvm_gcc_supports(name):
+    return name in langs
+
+# Provide on_clone hook for reading 'dg.exp'.
+import os
+simpleLibData = re.compile(r"""load_lib llvm.exp
+
+RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
+                           re.MULTILINE)
+conditionalLibData = re.compile(r"""load_lib llvm.exp
+
+if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
+ *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
+\}""", re.MULTILINE)
+def on_clone(parent, cfg, for_path):
+    def addSuffixes(match):
+        if match[0] == '{' and match[-1] == '}':
+            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
+        else:
+            cfg.suffixes = ['.' + match]
+
+    libPath = os.path.join(os.path.dirname(for_path),
+                           'dg.exp')
+    if not os.path.exists(libPath):
+        cfg.unsupported = True
+        return
+
+    # Reset unsupported, in case we inherited it.
+    cfg.unsupported = False
+    lib = open(libPath).read().strip()
+
+    # Check for a simple library.
+    m = simpleLibData.match(lib)
+    if m:
+        addSuffixes(m.group(1))
+        return
+
+    # Check for a conditional test set.
+    m = conditionalLibData.match(lib)
+    if m:
+        funcname,arg,match = m.groups()
+        addSuffixes(match)
+
+        func = globals().get(funcname)
+        if not func:
+            lit.error('unsupported predicate %r' % funcname)
+        elif not func(arg):
+            cfg.unsupported = True
+        return
+    # Otherwise, give up.
+    lit.error('unable to understand %r:\n%s' % (libPath, lib))
+
+config.on_clone = on_clone
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg b/utils/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
new file mode 100644
index 0000000..3bfee54
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
@@ -0,0 +1,10 @@
+# -*- Python -*-
+
+## Autogenerated by Makefile ##
+# Do not edit!
+
+# Preserve some key paths for use by main LLVM test suite config.
+config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
+
+# Let the main config do the real work.
+lit.load_config(config, os.path.join(config.llvm_obj_root, 'test/lit.cfg'))
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/site.exp b/utils/lit/ExampleTests/LLVM.InTree/test/site.exp
new file mode 100644
index 0000000..1d9c743
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.InTree/test/site.exp
@@ -0,0 +1,30 @@
+## these variables are automatically generated by make ##
+# Do not edit here.  If you wish to override these values
+# edit the last section
+set target_triplet "x86_64-apple-darwin10"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
+set llvmgcc_langs "c,c++,objc,obj-c++"
+set llvmgcc_version "4.2.1"
+set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
+set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
+set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
+set srcroot "/Volumes/Data/ddunbar/llvm"
+set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
+set srcdir "/Volumes/Data/ddunbar/llvm/test"
+set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
+set gccpath "/usr/bin/gcc -arch x86_64"
+set gxxpath "/usr/bin/g++ -arch x86_64"
+set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
+set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set llvmgccmajvers "4"
+set bugpoint_topts "-gcc-tool-args -m64"
+set shlibext ".dylib"
+set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
+set valgrind ""
+set grep "/usr/bin/grep"
+set gas "/usr/bin/as"
+set llvmdsymutil "dsymutil"
+## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
new file mode 100644
index 0000000..80d0c7e
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
@@ -0,0 +1 @@
+config.excludes = ['src']
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
new file mode 100644
index 0000000..bdcc35e
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
@@ -0,0 +1,11 @@
+# -*- Python -*-
+
+## Autogenerated by Makefile ##
+# Do not edit!
+
+# Preserve some key paths for use by main LLVM test suite config.
+config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
+
+# Let the main config do the real work.
+lit.load_config(config, os.path.join(config.llvm_obj_root,
+                                     '../src/test/lit.cfg'))
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
new file mode 100644
index 0000000..1d9c743
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
@@ -0,0 +1,30 @@
+## these variables are automatically generated by make ##
+# Do not edit here.  If you wish to override these values
+# edit the last section
+set target_triplet "x86_64-apple-darwin10"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
+set llvmgcc_langs "c,c++,objc,obj-c++"
+set llvmgcc_version "4.2.1"
+set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
+set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
+set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
+set srcroot "/Volumes/Data/ddunbar/llvm"
+set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
+set srcdir "/Volumes/Data/ddunbar/llvm/test"
+set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
+set gccpath "/usr/bin/gcc -arch x86_64"
+set gxxpath "/usr/bin/g++ -arch x86_64"
+set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
+set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set llvmgccmajvers "4"
+set bugpoint_topts "-gcc-tool-args -m64"
+set shlibext ".dylib"
+set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
+set valgrind ""
+set grep "/usr/bin/grep"
+set gas "/usr/bin/as"
+set llvmdsymutil "dsymutil"
+## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
new file mode 100644
index 0000000..45b983b
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
@@ -0,0 +1 @@
+hi
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
new file mode 100644
index 0000000..2bda07a
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
+
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
new file mode 100644
index 0000000..4e8a582
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
@@ -0,0 +1 @@
+; RUN: grep "hi" %S/data.txt
+\ No newline at end of file
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
new file mode 100644
index 0000000..e7ef037
--- /dev/null
+++ b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
@@ -0,0 +1,151 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.TclTest()
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.dirname(config.test_source_root)
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
+
+###
+
+# Load site data from DejaGNU's site.exp.
+import re
+site_exp = {}
+# FIXME: Implement lit.site.cfg.
+for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
+    m = re.match('set ([^ ]+) "([^"]*)"', line)
+    if m:
+        site_exp[m.group(1)] = m.group(2)
+
+# Add substitutions.
+for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
+            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'bugpoint_topts']:
+    if sub in ('llvmgcc', 'llvmgxx'):
+        config.substitutions.append(('%' + sub,
+                                     site_exp[sub] + ' -emit-llvm -w'))
+    else:
+        config.substitutions.append(('%' + sub, site_exp[sub]))
+
+excludes = []
+
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# Provide llvm_supports_target for use in local configs.
+targets = set(site_exp["TARGETS_TO_BUILD"].split())
+def llvm_supports_target(name):
+    return name in targets
+
+langs = set(site_exp['llvmgcc_langs'].split(','))
+def llvm_gcc_supports(name):
+    return name in langs
+
+# Provide on_clone hook for reading 'dg.exp'.
+import os
+simpleLibData = re.compile(r"""load_lib llvm.exp
+
+RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
+                           re.MULTILINE)
+conditionalLibData = re.compile(r"""load_lib llvm.exp
+
+if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
+ *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
+\}""", re.MULTILINE)
+def on_clone(parent, cfg, for_path):
+    def addSuffixes(match):
+        if match[0] == '{' and match[-1] == '}':
+            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
+        else:
+            cfg.suffixes = ['.' + match]
+
+    libPath = os.path.join(os.path.dirname(for_path),
+                           'dg.exp')
+    if not os.path.exists(libPath):
+        cfg.unsupported = True
+        return
+
+    # Reset unsupported, in case we inherited it.
+    cfg.unsupported = False
+    lib = open(libPath).read().strip()
+
+    # Check for a simple library.
+    m = simpleLibData.match(lib)
+    if m:
+        addSuffixes(m.group(1))
+        return
+
+    # Check for a conditional test set.
+    m = conditionalLibData.match(lib)
+    if m:
+        funcname,arg,match = m.groups()
+        addSuffixes(match)
+
+        func = globals().get(funcname)
+        if not func:
+            lit.error('unsupported predicate %r' % funcname)
+        elif not func(arg):
+            cfg.unsupported = True
+        return
+    # Otherwise, give up.
+    lit.error('unable to understand %r:\n%s' % (libPath, lib))
+
+config.on_clone = on_clone
diff --git a/utils/lit/ExampleTests/ShExternal/lit.local.cfg b/utils/lit/ExampleTests/ShExternal/lit.local.cfg
new file mode 100644
index 0000000..1061da6
--- /dev/null
+++ b/utils/lit/ExampleTests/ShExternal/lit.local.cfg
@@ -0,0 +1,6 @@
+# -*- Python -*-
+
+config.test_format = lit.formats.ShTest(execute_external = True)
+
+config.suffixes = ['.c']
+
diff --git a/utils/lit/ExampleTests/ShInternal/lit.local.cfg b/utils/lit/ExampleTests/ShInternal/lit.local.cfg
new file mode 100644
index 0000000..448eaa4
--- /dev/null
+++ b/utils/lit/ExampleTests/ShInternal/lit.local.cfg
@@ -0,0 +1,6 @@
+# -*- Python -*-
+
+config.test_format = lit.formats.ShTest(execute_external = False)
+
+config.suffixes = ['.c']
+
diff --git a/utils/lit/ExampleTests/TclTest/lit.local.cfg b/utils/lit/ExampleTests/TclTest/lit.local.cfg
new file mode 100644
index 0000000..6a37129
--- /dev/null
+++ b/utils/lit/ExampleTests/TclTest/lit.local.cfg
@@ -0,0 +1,5 @@
+# -*- Python -*-
+
+config.test_format = lit.formats.TclTest()
+
+config.suffixes = ['.ll']
diff --git a/utils/lit/ExampleTests/TclTest/stderr-pipe.ll b/utils/lit/ExampleTests/TclTest/stderr-pipe.ll
new file mode 100644
index 0000000..6c55fe8
--- /dev/null
+++ b/utils/lit/ExampleTests/TclTest/stderr-pipe.ll
@@ -0,0 +1 @@
+; RUN: gcc -### > /dev/null |& grep {gcc version}
diff --git a/utils/lit/ExampleTests/TclTest/tcl-redir-1.ll b/utils/lit/ExampleTests/TclTest/tcl-redir-1.ll
new file mode 100644
index 0000000..61240ba
--- /dev/null
+++ b/utils/lit/ExampleTests/TclTest/tcl-redir-1.ll
@@ -0,0 +1,7 @@
+; RUN: echo 'hi' > %t.1 | echo 'hello' > %t.2
+; RUN: not grep 'hi' %t.1
+; RUN: grep 'hello' %t.2
+
+
+
+
diff --git a/utils/lit/ExampleTests/fail.c b/utils/lit/ExampleTests/fail.c
new file mode 100644
index 0000000..84db41a
--- /dev/null
+++ b/utils/lit/ExampleTests/fail.c
@@ -0,0 +1,2 @@
+// RUN: echo 'I am some stdout'
+// RUN: false
diff --git a/utils/lit/ExampleTests/lit.cfg b/utils/lit/ExampleTests/lit.cfg
new file mode 100644
index 0000000..dbd574f
--- /dev/null
+++ b/utils/lit/ExampleTests/lit.cfg
@@ -0,0 +1,23 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = 'Examples'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.m', '.mm', '.ll']
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.ShTest()
+
+# test_source_root: The path where tests are located (default is the test suite
+# root).
+config.test_source_root = None
+
+# test_exec_root: The path where tests are located (default is the test suite
+# root).
+config.test_exec_root = None
+
+# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
+config.target_triple = 'foo'
diff --git a/utils/lit/ExampleTests/pass.c b/utils/lit/ExampleTests/pass.c
new file mode 100644
index 0000000..5c1031c
--- /dev/null
+++ b/utils/lit/ExampleTests/pass.c
@@ -0,0 +1 @@
+// RUN: true
diff --git a/utils/lit/ExampleTests/xfail.c b/utils/lit/ExampleTests/xfail.c
new file mode 100644
index 0000000..b36cd99
--- /dev/null
+++ b/utils/lit/ExampleTests/xfail.c
@@ -0,0 +1,2 @@
+// RUN: false
+// XFAIL: *
diff --git a/utils/lit/ExampleTests/xpass.c b/utils/lit/ExampleTests/xpass.c
new file mode 100644
index 0000000..ad84990
--- /dev/null
+++ b/utils/lit/ExampleTests/xpass.c
@@ -0,0 +1,2 @@
+// RUN: true
+// XFAIL
diff --git a/utils/lit/LitFormats.py b/utils/lit/LitFormats.py
index 9b8250d..270f087 100644
--- a/utils/lit/LitFormats.py
+++ b/utils/lit/LitFormats.py
@@ -1,2 +1,3 @@
-from TestFormats import GoogleTest, ShTest, TclTest, SyntaxCheckTest
+from TestFormats import GoogleTest, ShTest, TclTest
+from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
 
diff --git a/utils/lit/Test.py b/utils/lit/Test.py
index d3f6274..1f6556b 100644
--- a/utils/lit/Test.py
+++ b/utils/lit/Test.py
@@ -54,6 +54,14 @@ class Test:
         self.output = None
         # The wall time to execute this test, if timing and once complete.
         self.elapsed = None
+        # The repeat index of this test, or None.
+        self.index = None
+
+    def copyWithIndex(self, index):
+        import copy
+        res = copy.copy(self)
+        res.index = index
+        return res
 
     def setResult(self, result, output, elapsed):
         assert self.result is None, "Test result already set!"
diff --git a/utils/lit/TestFormats.py b/utils/lit/TestFormats.py
index 7e638b4..f067bae 100644
--- a/utils/lit/TestFormats.py
+++ b/utils/lit/TestFormats.py
@@ -53,8 +53,9 @@ class GoogleTest(object):
 
     def execute(self, test, litConfig):
         testPath,testName = os.path.split(test.getSourcePath())
-        if not os.path.exists(testPath):
-            # Handle GTest typed tests, whose name includes a '/'.
+        while not os.path.exists(testPath):
+            # Handle GTest parametrized and typed tests, whose name includes
+            # some '/'s.
             testPath, namePrefix = os.path.split(testPath)
             testName = os.path.join(namePrefix, testName)
 
@@ -81,14 +82,12 @@ class FileBasedTest(object):
                                     localConfig)
 
 class ShTest(FileBasedTest):
-    def __init__(self, execute_external = False, require_and_and = False):
+    def __init__(self, execute_external = False):
         self.execute_external = execute_external
-        self.require_and_and = require_and_and
 
     def execute(self, test, litConfig):
         return TestRunner.executeShTest(test, litConfig,
-                                        self.execute_external,
-                                        self.require_and_and)
+                                        self.execute_external)
 
 class TclTest(FileBasedTest):
     def execute(self, test, litConfig):
@@ -99,16 +98,20 @@ class TclTest(FileBasedTest):
 import re
 import tempfile
 
-class SyntaxCheckTest:
+class OneCommandPerFileTest:
     # FIXME: Refactor into generic test for running some command on a directory
     # of inputs.
 
-    def __init__(self, compiler, dir, recursive, pattern, extra_cxx_args=[]):
-        self.compiler = str(compiler)
+    def __init__(self, command, dir, recursive=False,
+                 pattern=".*", useTempInput=False):
+        if isinstance(command, str):
+            self.command = [command]
+        else:
+            self.command = list(command)
         self.dir = str(dir)
         self.recursive = bool(recursive)
         self.pattern = re.compile(pattern)
-        self.extra_cxx_args = list(extra_cxx_args)
+        self.useTempInput = useTempInput
 
     def getTestsInDirectory(self, testSuite, path_in_suite,
                             litConfig, localConfig):
@@ -116,6 +119,9 @@ class SyntaxCheckTest:
             if not self.recursive:
                 subdirs[:] = []
 
+            if dirname == '.svn' or dirname in localConfig.excludes:
+                continue
+
             for filename in filenames:
                 if (not self.pattern.match(filename) or
                     filename in localConfig.excludes):
@@ -132,17 +138,46 @@ class SyntaxCheckTest:
                 test.source_path = path
                 yield test
 
+    def createTempInput(self, tmp, test):
+        abstract
+
     def execute(self, test, litConfig):
-        tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
-        print >>tmp, '#include "%s"' % test.source_path
-        tmp.flush()
+        if test.config.unsupported:
+            return (Test.UNSUPPORTED, 'Test is unsupported')
+
+        cmd = list(self.command)
+
+        # If using temp input, create a temporary file and hand it to the
+        # subclass.
+        if self.useTempInput:
+            tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
+            self.createTempInput(tmp, test)
+            tmp.flush()
+            cmd.append(tmp.name)
+        else:
+            cmd.append(test.source_path)
 
-        cmd = [self.compiler, '-x', 'c++', '-fsyntax-only', tmp.name]
-        cmd.extend(self.extra_cxx_args)
         out, err, exitCode = TestRunner.executeCommand(cmd)
 
         diags = out + err
         if not exitCode and not diags.strip():
             return Test.PASS,''
 
-        return Test.FAIL, diags
+        # Try to include some useful information.
+        report = """Command: %s\n""" % ' '.join(["'%s'" % a
+                                                 for a in cmd])
+        if self.useTempInput:
+            report += """Temporary File: %s\n""" % tmp.name
+            report += "--\n%s--\n""" % open(tmp.name).read()
+        report += """Output:\n--\n%s--""" % diags
+
+        return Test.FAIL, report
+
+class SyntaxCheckTest(OneCommandPerFileTest):
+    def __init__(self, compiler, dir, extra_cxx_args=[], *args, **kwargs):
+        cmd = [compiler, '-x', 'c++', '-fsyntax-only'] + extra_cxx_args
+        OneCommandPerFileTest.__init__(self, cmd, dir,
+                                       useTempInput=1, *args, **kwargs)
+
+    def createTempInput(self, tmp, test):
+        print >>tmp, '#include "%s"' % test.source_path
diff --git a/utils/lit/TestRunner.py b/utils/lit/TestRunner.py
index bee1167..20fbc6c 100644
--- a/utils/lit/TestRunner.py
+++ b/utils/lit/TestRunner.py
@@ -112,6 +112,9 @@ def executeShCmd(cmd, cfg, cwd, results):
                         r[2] = tempfile.TemporaryFile(mode=r[1])
                     else:
                         r[2] = open(r[0], r[1])
+                    # Workaround a Win32 and/or subprocess bug when appending.
+                    if r[1] == 'a':
+                        r[2].seek(0, 2)
                 result = r[2]
             final_redirects.append(result)
 
@@ -350,7 +353,7 @@ def isExpectedFail(xfails, xtargets, target_triple):
 
     return True
 
-def parseIntegratedTestScript(test, requireAndAnd):
+def parseIntegratedTestScript(test):
     """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
     script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
     information. The RUN lines also will have variable substitution performed.
@@ -364,6 +367,8 @@ def parseIntegratedTestScript(test, requireAndAnd):
     execpath = test.getExecPath()
     execdir,execbase = os.path.split(execpath)
     tmpBase = os.path.join(execdir, 'Output', execbase)
+    if test.index is not None:
+        tmpBase += '_%d' % test.index
 
     # We use #_MARKER_# to hide %% while we do the other substitutions.
     substitutions = [('%%', '#_MARKER_#')]
@@ -422,19 +427,6 @@ def parseIntegratedTestScript(test, requireAndAnd):
     if script[-1][-1] == '\\':
         return (Test.UNRESOLVED, "Test has unterminated run lines (with '\\')")
 
-    # Validate interior lines for '&&', a lovely historical artifact.
-    if requireAndAnd:
-        for i in range(len(script) - 1):
-            ln = script[i]
-
-            if not ln.endswith('&&'):
-                return (Test.FAIL,
-                        ("MISSING \'&&\': %s\n"  +
-                         "FOLLOWED BY   : %s\n") % (ln, script[i + 1]))
-
-            # Strip off '&&'
-            script[i] = ln[:-2]
-
     isXFail = isExpectedFail(xfails, xtargets, test.suite.config.target_triple)
     return script,isXFail,tmpBase,execdir
 
@@ -459,7 +451,7 @@ def executeTclTest(test, litConfig):
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test, False)
+    res = parseIntegratedTestScript(test)
     if len(res) == 2:
         return res
 
@@ -488,11 +480,11 @@ def executeTclTest(test, litConfig):
 
     return formatTestOutput(status, out, err, exitCode, script)
 
-def executeShTest(test, litConfig, useExternalSh, requireAndAnd):
+def executeShTest(test, litConfig, useExternalSh):
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test, requireAndAnd)
+    res = parseIntegratedTestScript(test)
     if len(res) == 2:
         return res
 
diff --git a/utils/lit/TestingConfig.py b/utils/lit/TestingConfig.py
index e4874d7..1f5067c 100644
--- a/utils/lit/TestingConfig.py
+++ b/utils/lit/TestingConfig.py
@@ -12,6 +12,7 @@ class TestingConfig:
             environment = {
                 'PATH' : os.pathsep.join(litConfig.path +
                                          [os.environ.get('PATH','')]),
+                'PATHEXT' : os.environ.get('PATHEXT',''),
                 'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
                 'LLVM_DISABLE_CRT_DEBUG' : '1',
                 }
diff --git a/utils/lit/lit.py b/utils/lit/lit.py
index 462f912..dcdce7d 100755
--- a/utils/lit/lit.py
+++ b/utils/lit/lit.py
@@ -236,8 +236,8 @@ def getTests(path, litConfig, testSuiteCache, localConfigCache):
         litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
                                                         path_in_suite))
 
-    return getTestsInSuite(ts, path_in_suite, litConfig,
-                           testSuiteCache, localConfigCache)
+    return ts, getTestsInSuite(ts, path_in_suite, litConfig,
+                               testSuiteCache, localConfigCache)
 
 def getTestsInSuite(ts, path_in_suite, litConfig,
                     testSuiteCache, localConfigCache):
@@ -277,19 +277,24 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
         # site configuration and then in the source path.
         file_execpath = ts.getExecPath(path_in_suite + (filename,))
         if dirContainsTestSuite(file_execpath):
-            subiter = getTests(file_execpath, litConfig,
-                               testSuiteCache, localConfigCache)
+            sub_ts, subiter = getTests(file_execpath, litConfig,
+                                       testSuiteCache, localConfigCache)
         elif dirContainsTestSuite(file_sourcepath):
-            subiter = getTests(file_sourcepath, litConfig,
-                               testSuiteCache, localConfigCache)
+            sub_ts, subiter = getTests(file_sourcepath, litConfig,
+                                       testSuiteCache, localConfigCache)
         else:
             # Otherwise, continue loading from inside this test suite.
             subiter = getTestsInSuite(ts, path_in_suite + (filename,),
                                       litConfig, testSuiteCache,
                                       localConfigCache)
+            sub_ts = None
 
+        N = 0
         for res in subiter:
+            N += 1
             yield res
+        if sub_ts and not N:
+            litConfig.warning('test suite %r contained no tests' % sub_ts.name)
 
 def runTests(numThreads, litConfig, provider, display):
     # If only using one testing thread, don't use threads at all; this lets us
@@ -383,6 +388,9 @@ def main():
     group.add_option("", "--no-tcl-as-sh", dest="useTclAsSh",
                       help="Don't run Tcl scripts using 'sh'",
                       action="store_false", default=True)
+    group.add_option("", "--repeat", dest="repeatTests", metavar="N",
+                      help="Repeat tests N times (for timing)",
+                      action="store", default=None, type=int)
     parser.add_option_group(group)
 
     (opts, args) = parser.parse_args()
@@ -428,7 +436,7 @@ def main():
     for input in inputs:
         prev = len(tests)
         tests.extend(getTests(input, litConfig,
-                              testSuiteCache, localConfigCache))
+                              testSuiteCache, localConfigCache)[1])
         if prev == len(tests):
             litConfig.warning('input %r contained no tests' % input)
 
@@ -447,8 +455,8 @@ def main():
         print '-- Test Suites --'
         suitesAndTests = suitesAndTests.items()
         suitesAndTests.sort(key = lambda (ts,_): ts.name)
-        for ts,tests in suitesAndTests:
-            print '  %s - %d tests' %(ts.name, len(tests))
+        for ts,ts_tests in suitesAndTests:
+            print '  %s - %d tests' %(ts.name, len(ts_tests))
             print '    Source Root: %s' % ts.source_root
             print '    Exec Root  : %s' % ts.exec_root
 
@@ -467,6 +475,11 @@ def main():
     header = '-- Testing: %d%s tests, %d threads --'%(len(tests),extra,
                                                       opts.numThreads)
 
+    if opts.repeatTests:
+        tests = [t.copyWithIndex(i)
+                 for t in tests
+                 for i in range(opts.repeatTests)]
+
     progressBar = None
     if not opts.quiet:
         if opts.succinct and opts.useProgressBar:
@@ -519,11 +532,16 @@ def main():
         print
 
     if opts.timeTests:
-        byTime = list(tests)
-        byTime.sort(key = lambda t: t.elapsed)
+        # Collate, in case we repeated tests.
+        times = {}
+        for t in tests:
+            key = t.getFullName()
+            times[key] = times.get(key, 0.) + t.elapsed
+
+        byTime = list(times.items())
+        byTime.sort(key = lambda (name,elapsed): elapsed)
         if byTime:
-            Util.printHistogram([(t.getFullName(), t.elapsed) for t in byTime],
-                                title='Tests')
+            Util.printHistogram(byTime, title='Tests')
 
     for name,code in (('Expected Passes    ', Test.PASS),
                       ('Expected Failures  ', Test.XFAIL),
author	rdivacky <rdivacky@FreeBSD.org>	2009-11-18 14:58:34 +0000
committer	rdivacky <rdivacky@FreeBSD.org>	2009-11-18 14:58:34 +0000
commit	d2e985fd323c167e20f77b045a1d99ad166e65db (patch)
tree	6a111e552c75afc66228e3d8f19b6731e4013f10
parent	ded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff)
download	FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz