Update LLVM to r86025.

author: rdivacky <rdivacky@FreeBSD.org> 2009-11-04 14:58:56 +0000
committer: rdivacky <rdivacky@FreeBSD.org> 2009-11-04 14:58:56 +0000
commit: 7ff99155c39edd73ebf1c6adfa023b1048fee9a4 (patch)
tree: b4dc751bcee540346911aa4115729eff2f991657
parent: d1f06de484602e72707476a6152974847bac1570 (diff)
download: FreeBSD-src-7ff99155c39edd73ebf1c6adfa023b1048fee9a4.zip
FreeBSD-src-7ff99155c39edd73ebf1c6adfa023b1048fee9a4.tar.gz
659 files changed, 17742 insertions, 8555 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f712658..5b30309 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -319,16 +319,26 @@ if(LLVM_BUILD_EXAMPLES)
   add_subdirectory(examples)
 endif ()
 
-install(DIRECTORY include
-  DESTINATION .
+install(DIRECTORY include/
+  DESTINATION include
+  FILES_MATCHING
+  PATTERN "*.def"
+  PATTERN "*.h"
+  PATTERN "*.td"
+  PATTERN "*.inc"
   PATTERN ".svn" EXCLUDE
-  PATTERN "*.cmake" EXCLUDE
-  PATTERN "*.in" EXCLUDE
-  PATTERN "*.tmp" EXCLUDE
   )
 
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include
-  DESTINATION .
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
+  DESTINATION include
+  FILES_MATCHING
+  PATTERN "*.def"
+  PATTERN "*.h"
+  PATTERN "*.gen"
+  PATTERN "*.inc"
+  # Exclude include/llvm/CMakeFiles/intrinsics_gen.dir, matched by "*.def"
+  PATTERN "CMakeFiles" EXCLUDE
+  PATTERN ".svn" EXCLUDE
   )
 
 # TODO: make and install documentation.
diff --git a/Makefile b/Makefile
index d2000b4..671f92c 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,8 @@ LEVEL := .
 # "llvmCore", then this is an "Apple-style" build; search for
 # "Apple-style" in the comments for more info.  Anything else is a
 # normal build.
-ifneq ($(RC_ProjectName),llvmCore)  # Normal build (not "Apple-style").
+ifneq ($(findstring llvmCore, $(RC_ProjectName)),llvmCore)  # Normal build (not "Apple-style").
+
 ifeq ($(BUILD_DIRS_ONLY),1)
   DIRS := lib/System lib/Support utils
   OPTIONAL_DIRS :=
@@ -94,6 +95,8 @@ cross-compile-build-tools:
 	$(Verb) if [ ! -f BuildTools/Makefile ]; then \
           $(MKDIR) BuildTools; \
 	  cd BuildTools ; \
+	  unset CFLAGS ; \
+	  unset CXXFLAGS ; \
 	  $(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \
 		--host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE); \
 	  cd .. ; \
@@ -133,8 +136,7 @@ dist-hook::
 	$(Echo) Eliminating files constructed by configure
 	$(Verb) $(RM) -f \
 	  $(TopDistDir)/include/llvm/Config/config.h  \
-	  $(TopDistDir)/include/llvm/Support/DataTypes.h  \
-	  $(TopDistDir)/include/llvm/Support/ThreadSupport.h
+	  $(TopDistDir)/include/llvm/System/DataTypes.h
 
 clang-only: all
 tools-only: all
@@ -150,7 +152,7 @@ FilesToConfig := \
   include/llvm/Config/config.h \
   include/llvm/Config/Targets.def \
 	include/llvm/Config/AsmPrinters.def \
-  include/llvm/Support/DataTypes.h \
+  include/llvm/System/DataTypes.h \
 	tools/llvmc/plugins/Base/Base.td
 FilesToConfigPATH  := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig))
 
diff --git a/Makefile.config.in b/Makefile.config.in
index fc84c0b..2b9bbfd 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -250,6 +250,9 @@ RDYNAMIC := @RDYNAMIC@
 #DEBUG_SYMBOLS = 1
 @DEBUG_SYMBOLS@
 
+# The compiler flags to use for optimized builds.
+OPTIMIZE_OPTION := @OPTIMIZE_OPTION@
+
 # When ENABLE_PROFILING is enabled, the llvm source base is built with profile
 # information to allow gprof to be used to get execution frequencies.
 #ENABLE_PROFILING = 1
@@ -320,3 +323,9 @@ ENABLE_LLVMC_DYNAMIC = 0
 # support (via the -load option).
 ENABLE_LLVMC_DYNAMIC_PLUGINS = 1
 #@ENABLE_LLVMC_DYNAMIC_PLUGINS@
+
+# Optional flags supported by the compiler
+# -Wno-missing-field-initializers
+NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
+# -Wno-variadic-macros
+NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
diff --git a/Makefile.rules b/Makefile.rules
index 4e0bd5c..b4a5a37 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -312,16 +312,6 @@ endif
 #--------------------------------------------------------------------
 
 CPP.Defines :=
-# OPTIMIZE_OPTION - The optimization level option we want to build LLVM with
-# this can be overridden on the make command line.
-ifndef OPTIMIZE_OPTION
-  ifneq ($(HOST_OS),MingW)
-    OPTIMIZE_OPTION := -O3
-  else
-    OPTIMIZE_OPTION := -O2
-  endif
-endif
-
 ifeq ($(ENABLE_OPTIMIZED),1)
   BuildMode := Release
   # Don't use -fomit-frame-pointer on Darwin or FreeBSD.
@@ -566,6 +556,8 @@ endif
 ifeq ($(TARGET_OS),Darwin)
   ifneq ($(ARCH),ARM)
     TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+  else
+    TargetCommonOpts += -marm
   endif
 endif
 
@@ -1238,7 +1230,7 @@ endif
 endif
 
 ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD))
-LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map
+  LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map
 endif
 endif
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index f1b060e..7d0a9bb 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -607,6 +607,23 @@ if test -n "$LLVMGXX" && test -z "$LLVMGCC"; then
    AC_MSG_ERROR([Invalid llvm-gcc. Use --with-llvmgcc when --with-llvmgxx is used]);
 fi
 
+dnl Override the option to use for optimized builds.
+AC_ARG_WITH(optimize-option,
+  AS_HELP_STRING([--with-optimize-option],
+                 [Select the compiler options to use for optimized builds]),,
+                 withval=default)
+AC_MSG_CHECKING([optimization flags])
+case "$withval" in
+  default)
+    case "$llvm_cv_os_type" in
+    MingW) optimize_option=-O3 ;;
+    *)     optimize_option=-O2 ;;
+    esac ;;
+  *) optimize_option="$withval" ;;
+esac
+AC_SUBST(OPTIMIZE_OPTION,$optimize_option)
+AC_MSG_RESULT([$optimize_option])
+
 dnl Specify extra build options
 AC_ARG_WITH(extra-options,
   AS_HELP_STRING([--with-extra-options],
@@ -943,6 +960,12 @@ fi
 dnl Tool compatibility is okay if we make it here.
 AC_MSG_RESULT([ok])
 
+dnl Check optional compiler flags. 
+AC_MSG_CHECKING([optional compiler flags])
+CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
+CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
+AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS])
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 5: Check for libraries
@@ -1349,7 +1372,7 @@ AC_CONFIG_HEADERS([include/llvm/Config/config.h])
 AC_CONFIG_FILES([include/llvm/Config/Targets.def])
 AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def])
 AC_CONFIG_FILES([include/llvm/Config/AsmParsers.def])
-AC_CONFIG_HEADERS([include/llvm/Support/DataTypes.h])
+AC_CONFIG_HEADERS([include/llvm/System/DataTypes.h])
 
 dnl Configure the makefile's configuration data
 AC_CONFIG_FILES([Makefile.config])
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 320335c..acf3a2e 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -83,9 +83,9 @@ check_symbol_exists(floorf math.h HAVE_FLOORF)
 check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
 check_symbol_exists(malloc_zone_statistics malloc/malloc.h
                     HAVE_MALLOC_ZONE_STATISTICS)
-check_symbol_exists(mkdtemp unistd.h HAVE_MKDTEMP)
-check_symbol_exists(mkstemp unistd.h HAVE_MKSTEMP)
-check_symbol_exists(mktemp unistd.h HAVE_MKTEMP)
+check_symbol_exists(mkdtemp "stdlib.h;unistd.h" HAVE_MKDTEMP)
+check_symbol_exists(mkstemp "stdlib.h;unistd.h" HAVE_MKSTEMP)
+check_symbol_exists(mktemp "stdlib.h;unistd.h" HAVE_MKTEMP)
 check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
 check_symbol_exists(sbrk unistd.h HAVE_SBRK)
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
@@ -229,7 +229,7 @@ configure_file(
   )
 
 configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
-  ${LLVM_BINARY_DIR}/include/llvm/Support/DataTypes.h
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/System/DataTypes.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/System/DataTypes.h
   )
 
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index 1a4aa34..655e23e 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -23,7 +23,6 @@ set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSu
 set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMDebugger LLVMAnalysis LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMHello LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTransformUtils)
diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake
index 12a8968..b753735 100644
--- a/cmake/modules/LLVMProcessSources.cmake
+++ b/cmake/modules/LLVMProcessSources.cmake
@@ -22,6 +22,7 @@ endmacro(add_header_files)
 
 function(llvm_process_sources OUT_VAR)
   set( sources ${ARGN} )
+  llvm_check_source_file_list( ${sources} )
   # Create file dependencies on the tablegenned files, if any.  Seems
   # that this is not strictly needed, as dependencies of the .cpp
   # sources on the tablegenned .inc files are detected and handled,
@@ -37,3 +38,17 @@ function(llvm_process_sources OUT_VAR)
   endif()
   set( ${OUT_VAR} ${sources} PARENT_SCOPE )
 endfunction(llvm_process_sources)
+
+
+function(llvm_check_source_file_list)
+  set(listed ${ARGN})
+  file(GLOB globbed *.cpp)
+  foreach(g ${globbed})
+    get_filename_component(fn ${g} NAME)
+    list(FIND listed ${fn} idx)
+    if( idx LESS 0 )
+      message(SEND_ERROR "Found unknown source file ${g}
+Please update ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt\n")
+    endif()
+  endforeach()
+endfunction(llvm_check_source_file_list)
diff --git a/configure b/configure
index 102d3f0..e671329 100755
--- a/configure
+++ b/configure
@@ -848,6 +848,7 @@ LLVM_ENUM_TARGETS
 LLVM_ENUM_ASM_PRINTERS
 LLVM_ENUM_ASM_PARSERS
 ENABLE_CBE_PRINTF_A
+OPTIMIZE_OPTION
 EXTRA_OPTIONS
 BINUTILS_INCDIR
 ENABLE_LLVMC_DYNAMIC
@@ -913,6 +914,8 @@ LLVMGCCCOMMAND
 LLVMGXXCOMMAND
 LLVMGCC
 LLVMGXX
+NO_VARIADIC_MACROS
+NO_MISSING_FIELD_INITIALIZERS
 USE_UDIS86
 USE_OPROFILE
 HAVE_PTHREAD
@@ -1595,6 +1598,8 @@ Optional Packages:
                           searches PATH)
   --with-llvmgxx          Specify location of llvm-g++ driver (default
                           searches PATH)
+  --with-optimize-option  Select the compiler options to use for optimized
+                          builds
   --with-extra-options    Specify additional options to compile LLVM with
   --with-ocaml-libdir     Specify install location for ocaml bindings (default
                           is stdlib)
@@ -5190,6 +5195,29 @@ echo "$as_me: error: Invalid llvm-gcc. Use --with-llvmgcc when --with-llvmgxx is
 fi
 
 
+# Check whether --with-optimize-option was given.
+if test "${with_optimize_option+set}" = set; then
+  withval=$with_optimize_option;
+else
+  withval=default
+fi
+
+{ echo "$as_me:$LINENO: checking optimization flags" >&5
+echo $ECHO_N "checking optimization flags... $ECHO_C" >&6; }
+case "$withval" in
+  default)
+    case "$llvm_cv_os_type" in
+    MingW) optimize_option=-O3 ;;
+    *)     optimize_option=-O2 ;;
+    esac ;;
+  *) optimize_option="$withval" ;;
+esac
+OPTIMIZE_OPTION=$optimize_option
+
+{ echo "$as_me:$LINENO: result: $optimize_option" >&5
+echo "${ECHO_T}$optimize_option" >&6; }
+
+
 # Check whether --with-extra-options was given.
 if test "${with_extra_options+set}" = set; then
   withval=$with_extra_options;
@@ -11008,7 +11036,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11011 "configure"
+#line 11039 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -13152,7 +13180,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 13155 "configure"' > conftest.$ac_ext
+  echo '#line 13183 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -14870,11 +14898,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14873: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14901: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14877: \$? = $ac_status" >&5
+   echo "$as_me:14905: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -15138,11 +15166,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15141: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15169: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:15145: \$? = $ac_status" >&5
+   echo "$as_me:15173: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -15242,11 +15270,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15245: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15273: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:15249: \$? = $ac_status" >&5
+   echo "$as_me:15277: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -17694,7 +17722,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17697 "configure"
+#line 17725 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17794,7 +17822,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17797 "configure"
+#line 17825 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -20162,11 +20190,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:20165: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:20193: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:20169: \$? = $ac_status" >&5
+   echo "$as_me:20197: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -20266,11 +20294,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:20269: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:20297: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:20273: \$? = $ac_status" >&5
+   echo "$as_me:20301: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -21836,11 +21864,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21839: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21867: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:21843: \$? = $ac_status" >&5
+   echo "$as_me:21871: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -21940,11 +21968,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21943: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21971: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:21947: \$? = $ac_status" >&5
+   echo "$as_me:21975: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -24175,11 +24203,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24178: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24206: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24182: \$? = $ac_status" >&5
+   echo "$as_me:24210: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24443,11 +24471,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24446: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24474: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24450: \$? = $ac_status" >&5
+   echo "$as_me:24478: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24547,11 +24575,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24550: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24578: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:24554: \$? = $ac_status" >&5
+   echo "$as_me:24582: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -27459,6 +27487,15 @@ fi
 { echo "$as_me:$LINENO: result: ok" >&5
 echo "${ECHO_T}ok" >&6; }
 
+{ echo "$as_me:$LINENO: checking optional compiler flags" >&5
+echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; }
+NO_VARIADIC_MACROS=`$CXX -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+
+NO_MISSING_FIELD_INITIALIZERS=`$CXX -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
+
+{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS" >&5
+echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS" >&6; }
+
 
 
 { echo "$as_me:$LINENO: checking for sin in -lm" >&5
@@ -35260,7 +35297,7 @@ ac_config_files="$ac_config_files include/llvm/Config/AsmPrinters.def"
 
 ac_config_files="$ac_config_files include/llvm/Config/AsmParsers.def"
 
-ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h"
+ac_config_headers="$ac_config_headers include/llvm/System/DataTypes.h"
 
 
 ac_config_files="$ac_config_files Makefile.config"
@@ -35887,7 +35924,7 @@ do
     "include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;;
     "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;;
     "include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;;
-    "include/llvm/Support/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Support/DataTypes.h" ;;
+    "include/llvm/System/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/System/DataTypes.h" ;;
     "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
     "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
     "docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxygen.cfg" ;;
@@ -36061,11 +36098,11 @@ LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
 LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
 LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
 ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
+OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
 ENABLE_LLVMC_DYNAMIC!$ENABLE_LLVMC_DYNAMIC$ac_delim
 ENABLE_LLVMC_DYNAMIC_PLUGINS!$ENABLE_LLVMC_DYNAMIC_PLUGINS$ac_delim
-CXX!$CXX$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -36107,6 +36144,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+CXX!$CXX$ac_delim
 CXXFLAGS!$CXXFLAGS$ac_delim
 ac_ct_CXX!$ac_ct_CXX$ac_delim
 NM!$NM$ac_delim
@@ -36167,6 +36205,8 @@ LLVMGCCCOMMAND!$LLVMGCCCOMMAND$ac_delim
 LLVMGXXCOMMAND!$LLVMGXXCOMMAND$ac_delim
 LLVMGCC!$LLVMGCC$ac_delim
 LLVMGXX!$LLVMGXX$ac_delim
+NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim
+NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
 USE_UDIS86!$USE_UDIS86$ac_delim
 USE_OPROFILE!$USE_OPROFILE$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
@@ -36201,7 +36241,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index e75887b..7a46d90 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -27,6 +27,15 @@
   <li><a href="#llvmir">LLVM IR Encoding</a>
     <ol>
     <li><a href="#basics">Basics</a></li>
+    <li><a href="#MODULE_BLOCK">MODULE_BLOCK Contents</a></li>
+    <li><a href="#PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a></li>
+    <li><a href="#TYPE_BLOCK">TYPE_BLOCK Contents</a></li>
+    <li><a href="#CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a></li>
+    <li><a href="#FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a></li>
+    <li><a href="#TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a></li>
+    <li><a href="#VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a></li>
+    <li><a href="#METADATA_BLOCK">METADATA_BLOCK Contents</a></li>
+    <li><a href="#METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a></li>
     </ol>
   </li>
 </ol>
@@ -220,7 +229,7 @@ A bitstream is a sequential series of <a href="#blocks">Blocks</a> and
 abbreviation ID encoded as a fixed-bitwidth field.  The width is specified by
 the current block, as described below.  The value of the abbreviation ID
 specifies either a builtin ID (which have special meanings, defined below) or
-one of the abbreviation IDs defined by the stream itself.
+one of the abbreviation IDs defined for the current block by the stream itself.
 </p>
 
 <p>
@@ -258,7 +267,7 @@ application specific. Nested blocks capture the hierarchical structure of the da
 encoded in it, and various properties are associated with blocks as the file is
 parsed.  Block definitions allow the reader to efficiently skip blocks
 in constant time if the reader wants a summary of blocks, or if it wants to
-efficiently skip data they do not understand.  The LLVM IR reader uses this
+efficiently skip data it does not understand.  The LLVM IR reader uses this
 mechanism to skip function bodies, lazily reading them on demand.
 </p>
 
@@ -268,7 +277,8 @@ block.  In particular, each block maintains:
 </p>
 
 <ol>
-<li>A current abbrev id width.  This value starts at 2, and is set every time a
+<li>A current abbrev id width.  This value starts at 2 at the beginning of
+    the stream, and is set every time a
     block record is entered.  The block entry specifies the abbrev id width for
     the body of the block.</li>
 
@@ -335,13 +345,14 @@ an even multiple of 32-bits.
 
 <div class="doc_text">
 <p>
-Data records consist of a record code and a number of (up to) 64-bit integer
-values.  The interpretation of the code and values is application specific and
-there are multiple different ways to encode a record (with an unabbrev record or
-with an abbreviation).  In the LLVM IR format, for example, there is a record
+Data records consist of a record code and a number of (up to) 64-bit
+integer values.  The interpretation of the code and values is
+application specific and may vary between different block types.
+Records can be encoded either using an unabbrev record, or with an
+abbreviation.  In the LLVM IR format, for example, there is a record
 which encodes the target triple of a module.  The code is
-<tt>MODULE_CODE_TRIPLE</tt>, and the values of the record are the ASCII codes
-for the characters in the string.
+<tt>MODULE_CODE_TRIPLE</tt>, and the values of the record are the
+ASCII codes for the characters in the string.
 </p>
 
 </div>
@@ -358,7 +369,7 @@ Encoding</a></div>
 <p>
 An <tt>UNABBREV_RECORD</tt> provides a default fallback encoding, which is both
 completely general and extremely inefficient.  It can describe an arbitrary
-record by emitting the code and operands as vbrs.
+record by emitting the code and operands as VBRs.
 </p>
 
 <p>
@@ -391,6 +402,11 @@ allows the files to be completely self describing.  The actual encoding of
 abbreviations is defined below.
 </p>
 
+<p>The record code, which is the first field of an abbreviated record,
+may be encoded in the abbreviation definition (as a literal
+operand) or supplied in the abbreviated record (as a Fixed or VBR
+operand value).</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -409,8 +425,9 @@ emitted.
 <p>
 Abbreviations can be determined dynamically per client, per file. Because the
 abbreviations are stored in the bitstream itself, different streams of the same
-format can contain different sets of abbreviations if the specific stream does
-not need it.  As a concrete example, LLVM IR files usually emit an abbreviation
+format can contain different sets of abbreviations according to the needs
+of the specific stream.
+As a concrete example, LLVM IR files usually emit an abbreviation
 for binary operators.  If a specific LLVM module contained no or few binary
 operators, the abbreviation does not need to be emitted.
 </p>
@@ -431,7 +448,8 @@ defined abbreviations in the scope of this block.  This definition only exists
 inside this immediate block &mdash; it is not visible in subblocks or enclosing
 blocks.  Abbreviations are implicitly assigned IDs sequentially starting from 4
 (the first application-defined abbreviation ID).  Any abbreviations defined in a
-<tt>BLOCKINFO</tt> record receive IDs first, in order, followed by any
+<tt>BLOCKINFO</tt> record for the particular block type
+receive IDs first, in order, followed by any
 abbreviations defined within the block itself.  Abbreviated data records
 reference this ID to indicate what abbreviation they are invoking.
 </p>
@@ -461,31 +479,32 @@ emitted as their code, followed by the extra data.
 
 <p>The possible operand encodings are:</p>
 
-<ol>
-<li>Fixed: The field should be emitted as
+<ul>
+<li>Fixed (code 1): The field should be emitted as
     a <a href="#fixedwidth">fixed-width value</a>, whose width is specified by
     the operand's extra data.</li>
-<li>VBR: The field should be emitted as
+<li>VBR (code 2): The field should be emitted as
     a <a href="#variablewidth">variable-width value</a>, whose width is
     specified by the operand's extra data.</li>
-<li>Array: This field is an array of values.  The array operand
-    has no extra data, but expects another operand to follow it which indicates
+<li>Array (code 3): This field is an array of values.  The array operand
+    has no extra data, but expects another operand to follow it, indicating
     the element type of the array.  When reading an array in an abbreviated
     record, the first integer is a vbr6 that indicates the array length,
     followed by the encoded elements of the array.  An array may only occur as
     the last operand of an abbreviation (except for the one final operand that
     gives the array's type).</li>
-<li>Char6: This field should be emitted as
+<li>Char6 (code 4): This field should be emitted as
     a <a href="#char6">char6-encoded value</a>.  This operand type takes no
-    extra data.</li>
-<li>Blob: This field is emitted as a vbr6, followed by padding to a
+    extra data. Char6 encoding is normally used as an array element type.
+    </li>
+<li>Blob (code 5): This field is emitted as a vbr6, followed by padding to a
     32-bit boundary (for alignment) and an array of 8-bit objects.  The array of
     bytes is further followed by tail padding to ensure that its total length is
     a multiple of 4 bytes.  This makes it very efficient for the reader to
     decode the data without having to make a copy of it: it can use a pointer to
     the data in the mapped in file and poke directly at it.  A blob may only
     occur as the last operand of an abbreviation.</li>
-</ol>
+</ul>
 
 <p>
 For example, target triples in LLVM modules are encoded as a record of the
@@ -517,7 +536,7 @@ as:
 
 <ol>
 <li>The first value, 4, is the abbreviation ID for this abbreviation.</li>
-<li>The second value, 2, is the code for <tt>TRIPLE</tt> in LLVM IR files.</li>
+<li>The second value, 2, is the record code for <tt>TRIPLE</tt> records within LLVM IR file <tt>MODULE_BLOCK</tt> blocks.</li>
 <li>The third value, 4, is the length of the array.</li>
 <li>The rest of the values are the char6 encoded values
     for <tt>"abcd"</tt>.</li>
@@ -541,7 +560,7 @@ used for any other string value.
 
 <p>
 In addition to the basic block structure and record encodings, the bitstream
-also defines specific builtin block types.  These block types specify how the
+also defines specific built-in block types.  These block types specify how the
 stream is to be decoded or other metadata.  In the future, new standard blocks
 may be added.  Block IDs 0-7 are reserved for standard blocks.
 </p>
@@ -569,7 +588,7 @@ blocks.  The currently specified records are:
 </div>
 
 <p>
-The <tt>SETBID</tt> record indicates which block ID is being
+The <tt>SETBID</tt> record (code 1) indicates which block ID is being
 described.  <tt>SETBID</tt> records can occur multiple times throughout the
 block to change which block ID is being described.  There must be
 a <tt>SETBID</tt> record prior to any other records.
@@ -584,13 +603,13 @@ in <tt>BLOCKINFO</tt> blocks receive abbreviation IDs as described
 in <tt><a href="#DEFINE_ABBREV">DEFINE_ABBREV</a></tt>.
 </p>
 
-<p>The <tt>BLOCKNAME</tt> can optionally occur in this block.  The elements of
-the record are the bytes for the string name of the block.  llvm-bcanalyzer uses
+<p>The <tt>BLOCKNAME</tt> record (code 2) can optionally occur in this block.  The elements of
+the record are the bytes of the string name of the block.  llvm-bcanalyzer can use
 this to dump out bitcode files symbolically.</p>
 
-<p>The <tt>SETRECORDNAME</tt> record can optionally occur in this block.  The
-first entry is a record ID number and the rest of the elements of the record are
-the bytes for the string name of the record.  llvm-bcanalyzer uses
+<p>The <tt>SETRECORDNAME</tt> record (code 3) can also optionally occur in this block.  The
+first operand value is a record ID number, and the rest of the elements of the record are
+the bytes for the string name of the record.  llvm-bcanalyzer can use
 this to dump out bitcode files symbolically.</p>
 
 <p>
@@ -626,7 +645,7 @@ Each of the fields are 32-bit fields stored in little endian form (as with
 the rest of the bitcode file fields).  The Magic number is always
 <tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>.  The Offset
 field is the offset in bytes to the start of the bitcode stream in the file, and
-the Size field is a size in bytes of the stream. CPUType is a target-specific
+the Size field is the size in bytes of the stream. CPUType is a target-specific
 value that can be used to encode the CPU of the target.
 </p>
 
@@ -681,26 +700,28 @@ When combined with the bitcode magic number and viewed as bytes, this is
 <div class="doc_text">
 
 <p>
-<a href="#variablewidth">Variable Width Integers</a> are an efficient way to
-encode arbitrary sized unsigned values, but is an extremely inefficient way to
-encode signed values (as signed values are otherwise treated as maximally large
-unsigned values).
+<a href="#variablewidth">Variable Width Integer</a> encoding is an efficient way to
+encode arbitrary sized unsigned values, but is an extremely inefficient for
+encoding signed values, as signed values are otherwise treated as maximally large
+unsigned values.
 </p>
 
 <p>
-As such, signed vbr values of a specific width are emitted as follows:
+As such, signed VBR values of a specific width are emitted as follows:
 </p>
 
 <ul>
-<li>Positive values are emitted as vbrs of the specified width, but with their
+<li>Positive values are emitted as VBRs of the specified width, but with their
     value shifted left by one.</li>
-<li>Negative values are emitted as vbrs of the specified width, but the negated
+<li>Negative values are emitted as VBRs of the specified width, but the negated
     value is shifted left by one, and the low bit is set.</li>
 </ul>
 
 <p>
-With this encoding, small positive and small negative values can both be emitted
-efficiently.
+With this encoding, small positive and small negative values can both
+be emitted efficiently. Signed VBR encoding is used in
+<tt>CST_CODE_INTEGER</tt> and <tt>CST_CODE_WIDE_INTEGER</tt> records
+within <tt>CONSTANTS_BLOCK</tt> blocks.
 </p>
 
 </div>
@@ -716,21 +737,23 @@ LLVM IR is defined with the following blocks:
 </p>
 
 <ul>
-<li>8  &mdash; <tt>MODULE_BLOCK</tt> &mdash; This is the top-level block that
+<li>8  &mdash; <a href="#MODULE_BLOCK"><tt>MODULE_BLOCK</tt></a> &mdash; This is the top-level block that
     contains the entire module, and describes a variety of per-module
     information.</li>
-<li>9  &mdash; <tt>PARAMATTR_BLOCK</tt> &mdash; This enumerates the parameter
+<li>9  &mdash; <a href="#PARAMATTR_BLOCK"><tt>PARAMATTR_BLOCK</tt></a> &mdash; This enumerates the parameter
     attributes.</li>
-<li>10 &mdash; <tt>TYPE_BLOCK</tt> &mdash; This describes all of the types in
+<li>10 &mdash; <a href="#TYPE_BLOCK"><tt>TYPE_BLOCK</tt></a> &mdash; This describes all of the types in
     the module.</li>
-<li>11 &mdash; <tt>CONSTANTS_BLOCK</tt> &mdash; This describes constants for a
+<li>11 &mdash; <a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a> &mdash; This describes constants for a
     module or function.</li>
-<li>12 &mdash; <tt>FUNCTION_BLOCK</tt> &mdash; This describes a function
+<li>12 &mdash; <a href="#FUNCTION_BLOCK"><tt>FUNCTION_BLOCK</tt></a> &mdash; This describes a function
     body.</li>
-<li>13 &mdash; <tt>TYPE_SYMTAB_BLOCK</tt> &mdash; This describes the type symbol
+<li>13 &mdash; <a href="#TYPE_SYMTAB_BLOCK"><tt>TYPE_SYMTAB_BLOCK</tt></a> &mdash; This describes the type symbol
     table.</li>
-<li>14 &mdash; <tt>VALUE_SYMTAB_BLOCK</tt> &mdash; This describes a value symbol
+<li>14 &mdash; <a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a> &mdash; This describes a value symbol
     table.</li>
+<li>15 &mdash; <a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a> &mdash; This describes metadata items.</li>
+<li>16 &mdash; <a href="#METADATA_ATTACHMENT"><tt>METADATA_ATTACHMENT</tt></a> &mdash; This contains records associating metadata with function instruction values.</li>
 </ul>
 
 </div>
@@ -741,7 +764,387 @@ LLVM IR is defined with the following blocks:
 
 <div class="doc_text">
 
-<p>
+<p>The <tt>MODULE_BLOCK</tt> block (id 8) is the top-level block for LLVM
+bitcode files, and each bitcode file must contain exactly one. In
+addition to records (described below) containing information
+about the module, a <tt>MODULE_BLOCK</tt> block may contain the
+following sub-blocks:
+</p>
+
+<ul>
+<li><a href="#BLOCKINFO"><tt>BLOCKINFO</tt></a></li>
+<li><a href="#PARAMATTR_BLOCK"><tt>PARAMATTR_BLOCK</tt></a></li>
+<li><a href="#TYPE_BLOCK"><tt>TYPE_BLOCK</tt></a></li>
+<li><a href="#TYPE_SYMTAB_BLOCK"><tt>TYPE_SYMTAB_BLOCK</tt></a></li>
+<li><a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a></li>
+<li><a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a></li>
+<li><a href="#FUNCTION_BLOCK"><tt>FUNCTION_BLOCK</tt></a></li>
+<li><a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a></li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_VERSION">MODULE_CODE_VERSION Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[VERSION, version#]</tt></p>
+
+<p>The <tt>VERSION</tt> record (code 1) contains a single value
+indicating the format version. Only version 0 is supported at this
+time.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_TRIPLE">MODULE_CODE_TRIPLE Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[TRIPLE, ...string...]</tt></p>
+
+<p>The <tt>TRIPLE</tt> record (code 2) contains a variable number of
+values representing the bytes of the <tt>target triple</tt>
+specification string.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_DATALAYOUT">MODULE_CODE_DATALAYOUT Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[DATALAYOUT, ...string...]</tt></p>
+
+<p>The <tt>DATALAYOUT</tt> record (code 3) contains a variable number of
+values representing the bytes of the <tt>target datalayout</tt>
+specification string.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_ASM">MODULE_CODE_ASM Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[ASM, ...string...]</tt></p>
+
+<p>The <tt>ASM</tt> record (code 4) contains a variable number of
+values representing the bytes of <tt>module asm</tt> strings, with
+individual assembly blocks separated by newline (ASCII 10) characters.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[SECTIONNAME, ...string...]</tt></p>
+
+<p>The <tt>SECTIONNAME</tt> record (code 5) contains a variable number
+of values representing the bytes of a single section name
+string. There should be one <tt>SECTIONNAME</tt> record for each
+section name referenced (e.g., in global variable or function
+<tt>section</tt> attributes) within the module. These records can be
+referenced by the 1-based index in the <i>section</i> fields of
+<tt>GLOBALVAR</tt> or <tt>FUNCTION</tt> records.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_DEPLIB">MODULE_CODE_DEPLIB Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[DEPLIB, ...string...]</tt></p>
+
+<p>The <tt>DEPLIB</tt> record (code 6) contains a variable number of
+values representing the bytes of a single dependent library name
+string, one of the libraries mentioned in a <tt>deplibs</tt>
+declaration.  There should be one <tt>DEPLIB</tt> record for each
+library name referenced.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_GLOBALVAR">MODULE_CODE_GLOBALVAR Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal]</tt></p>
+
+<p>The <tt>GLOBALVAR</tt> record (code 7) marks the declaration or
+definition of a global variable. The operand fields are:</p>
+
+<ul>
+<li><i>pointer type</i>: The type index of the pointer type used to point to
+this global variable</li>
+
+<li><i>isconst</i>: Non-zero if the variable is treated as constant within
+the module, or zero if it is not</li>
+
+<li><i>initid</i>: If non-zero, the value index of the initializer for this
+variable, plus 1.</li>
+
+<li><a name="linkage"><i>linkage</i></a>: An encoding of the linkage
+type for this variable:
+  <ul>
+    <li><tt>external</tt>: code 0</li>
+    <li><tt>weak</tt>: code 1</li>
+    <li><tt>appending</tt>: code 2</li>
+    <li><tt>internal</tt>: code 3</li>
+    <li><tt>linkonce</tt>: code 4</li>
+    <li><tt>dllimport</tt>: code 5</li>
+    <li><tt>dllexport</tt>: code 6</li>
+    <li><tt>extern_weak</tt>: code 7</li>
+    <li><tt>common</tt>: code 8</li>
+    <li><tt>private</tt>: code 9</li>
+    <li><tt>weak_odr</tt>: code 10</li>
+    <li><tt>linkonce_odr</tt>: code 11</li>
+    <li><tt>available_externally</tt>: code 12</li>
+    <li><tt>linker_private</tt>: code 13</li>
+  </ul>
+</li>
+
+<li><i>alignment</i>: The logarithm base 2 of the variable's requested
+alignment, plus 1</li>
+
+<li><i>section</i>: If non-zero, the 1-based section index in the
+table of <a href="#MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME</a>
+entries.</li>
+
+<li><a name="visibility"><i>visibility</i></a>: If present, an
+encoding of the visibility of this variable:
+  <ul>
+    <li><tt>default</tt>: code 0</li>
+    <li><tt>hidden</tt>: code 1</li>
+    <li><tt>protected</tt>: code 2</li>
+  </ul>
+</li>
+
+<li><i>threadlocal</i>: If present and non-zero, indicates that the variable
+is <tt>thread_local</tt></li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_FUNCTION">MODULE_CODE_FUNCTION Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc]</tt></p>
+
+<p>The <tt>FUNCTION</tt> record (code 8) marks the declaration or
+definition of a function. The operand fields are:</p>
+
+<ul>
+<li><i>type</i>: The type index of the function type describing this function</li>
+
+<li><i>callingconv</i>: The calling convention number:
+  <ul>
+    <li><tt>ccc</tt>: code 0</li>
+    <li><tt>fastcc</tt>: code 8</li>
+    <li><tt>coldcc</tt>: code 9</li>
+    <li><tt>x86_stdcallcc</tt>: code 64</li>
+    <li><tt>x86_fastcallcc</tt>: code 65</li>
+    <li><tt>arm_apcscc</tt>: code 66</li>
+    <li><tt>arm_aapcscc</tt>: code 67</li>
+    <li><tt>arm_aapcs_vfpcc</tt>: code 68</li>
+  </ul>
+</li>
+
+<li><i>isproto</i>: Non-zero if this entry represents a declaration
+rather than a definition</li>
+
+<li><i>linkage</i>: An encoding of the <a href="#linkage">linkage type</a>
+for this function</li>
+
+<li><i>paramattr</i>: If nonzero, the 1-based parameter attribute index
+into the table of <a href="#PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY</a>
+entries.</li>
+
+<li><i>alignment</i>: The logarithm base 2 of the function's requested
+alignment, plus 1</li>
+
+<li><i>section</i>: If non-zero, the 1-based section index in the
+table of <a href="#MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME</a>
+entries.</li>
+
+<li><i>visibility</i>: An encoding of the <a href="#visibility">visibility</a>
+    of this function</li>
+
+<li><i>gc</i>: If present and nonzero, the 1-based garbage collector
+index in the table of
+<a href="#MODULE_CODE_GCNAME">MODULE_CODE_GCNAME</a> entries.</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_ALIAS">MODULE_CODE_ALIAS Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ALIAS, alias type, aliasee val#, linkage, visibility]</tt></p>
+
+<p>The <tt>ALIAS</tt> record (code 9) marks the definition of an
+alias. The operand fields are</p>
+
+<ul>
+<li><i>alias type</i>: The type index of the alias</li>
+
+<li><i>aliasee val#</i>: The value index of the aliased value</li>
+
+<li><i>linkage</i>: An encoding of the <a href="#linkage">linkage type</a>
+for this alias</li>
+
+<li><i>visibility</i>: If present, an encoding of the
+<a href="#visibility">visibility</a> of the alias</li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_PURGEVALS">MODULE_CODE_PURGEVALS Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[PURGEVALS, numvals]</tt></p>
+
+<p>The <tt>PURGEVALS</tt> record (code 10) resets the module-level
+value list to the size given by the single operand value. Module-level
+value list items are added by <tt>GLOBALVAR</tt>, <tt>FUNCTION</tt>,
+and <tt>ALIAS</tt> records.  After a <tt>PURGEVALS</tt> record is seen,
+new value indices will start from the given <i>numvals</i> value.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_GCNAME">MODULE_CODE_GCNAME Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[GCNAME, ...string...]</tt></p>
+
+<p>The <tt>GCNAME</tt> record (code 11) contains a variable number of
+values representing the bytes of a single garbage collector name
+string. There should be one <tt>GCNAME</tt> record for each garbage
+collector name referenced in function <tt>gc</tt> attributes within
+the module. These records can be referenced by 1-based index in the <i>gc</i>
+fields of <tt>FUNCTION</tt> records.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>PARAMATTR_BLOCK</tt> block (id 9) ...
+</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ENTRY, paramidx0, attr0, paramidx1, attr1...]</tt></p>
+
+<p>The <tt>ENTRY</tt> record (code 1) ...
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="TYPE_BLOCK">TYPE_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TYPE_BLOCK</tt> block (id 10) ...
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>CONSTANTS_BLOCK</tt> block (id 11) ...
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>FUNCTION_BLOCK</tt> block (id 12) ...
+</p>
+
+<p>In addition to the record types described below, a
+<tt>FUNCTION_BLOCK</tt> block may contain the following sub-blocks:
+</p>
+
+<ul>
+<li><a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a></li>
+<li><a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a></li>
+<li><a href="#METADATA_ATTACHMENT"><tt>METADATA_ATTACHMENT</tt></a></li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TYPE_SYMTAB_BLOCK</tt> block (id 13) ...
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>VALUE_SYMTAB_BLOCK</tt> block (id 14) ... 
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="METADATA_BLOCK">METADATA_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>METADATA_BLOCK</tt> block (id 15) ...
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>METADATA_ATTACHMENT</tt> block (id 16) ...
 </p>
 
 </div>
@@ -755,7 +1158,7 @@ LLVM IR is defined with the following blocks:
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
+Last modified: $Date: 2009-10-29 05:25:46 +0100 (Thu, 29 Oct 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
index a818302..929aed7 100644
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@@ -49,6 +49,11 @@ Show the B<lit> help message.
 Run I<N> tests in parallel. By default, this is automatically chose to match the
 number of detected available CPUs.
 
+=item B<--config-prefix>=I<NAME>
+
+Search for I<NAME.cfg> and I<NAME.site.cfg> when searching for test suites,
+instead I<lit.cfg> and I<lit.site.cfg>.
+
 =back 
 
 =head1 OUTPUT OPTIONS
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index 7a40a4d..ded3cf4 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -33,11 +33,12 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 </ul>
 </li>
 <li><a class="reference internal" href="#language-map" id="id15">Language map</a></li>
-<li><a class="reference internal" href="#more-advanced-topics" id="id16">More advanced topics</a><ul>
-<li><a class="reference internal" href="#hooks-and-environment-variables" id="id17">Hooks and environment variables</a></li>
-<li><a class="reference internal" href="#how-plugins-are-loaded" id="id18">How plugins are loaded</a></li>
-<li><a class="reference internal" href="#debugging" id="id19">Debugging</a></li>
-<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id20">Conditioning on the executable name</a></li>
+<li><a class="reference internal" href="#option-preprocessor" id="id16">Option preprocessor</a></li>
+<li><a class="reference internal" href="#more-advanced-topics" id="id17">More advanced topics</a><ul>
+<li><a class="reference internal" href="#hooks-and-environment-variables" id="id18">Hooks and environment variables</a></li>
+<li><a class="reference internal" href="#how-plugins-are-loaded" id="id19">How plugins are loaded</a></li>
+<li><a class="reference internal" href="#debugging" id="id20">Debugging</a></li>
+<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id21">Conditioning on the executable name</a></li>
 </ul>
 </li>
 </ul>
@@ -307,13 +308,13 @@ separate option groups syntactically.</p>
 <tt class="docutils literal"><span class="pre">-std=c99</span></tt>. It is also allowed to use spaces instead of the equality
 sign: <tt class="docutils literal"><span class="pre">-std</span> <span class="pre">c99</span></tt>. At most one occurrence is allowed.</li>
 <li><tt class="docutils literal"><span class="pre">parameter_list_option</span></tt> - same as the above, but more than one option
-occurrence is allowed.</li>
+occurence is allowed.</li>
 <li><tt class="docutils literal"><span class="pre">prefix_option</span></tt> - same as the parameter_option, but the option name and
 argument do not have to be separated. Example: <tt class="docutils literal"><span class="pre">-ofile</span></tt>. This can be also
 specified as <tt class="docutils literal"><span class="pre">-o</span> <span class="pre">file</span></tt>; however, <tt class="docutils literal"><span class="pre">-o=file</span></tt> will be parsed incorrectly
 (<tt class="docutils literal"><span class="pre">=file</span></tt> will be interpreted as option value). At most one occurrence is
 allowed.</li>
-<li><tt class="docutils literal"><span class="pre">prefix_list_option</span></tt> - same as the above, but more than one occurrence of
+<li><tt class="docutils literal"><span class="pre">prefix_list_option</span></tt> - same as the above, but more than one occurence of
 the option is allowed; example: <tt class="docutils literal"><span class="pre">-lm</span> <span class="pre">-lpthread</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">alias_option</span></tt> - a special option type for creating aliases. Unlike other
 option types, aliases are not allowed to have any properties besides the
@@ -343,8 +344,9 @@ output).</li>
 output.</li>
 <li><tt class="docutils literal"><span class="pre">multi_val</span> <span class="pre">n</span></tt> - this option takes <em>n</em> arguments (can be useful in some
 special cases). Usage example: <tt class="docutils literal"><span class="pre">(parameter_list_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(multi_val</span>
-<span class="pre">3))</span></tt>. Only list options can have this attribute; you can, however, use
-the <tt class="docutils literal"><span class="pre">one_or_more</span></tt> and <tt class="docutils literal"><span class="pre">zero_or_one</span></tt> properties.</li>
+<span class="pre">3))</span></tt>; the command-line syntax is '-foo a b c'. Only list options can have
+this attribute; you can, however, use the <tt class="docutils literal"><span class="pre">one_or_more</span></tt>, <tt class="docutils literal"><span class="pre">zero_or_one</span></tt>
+and <tt class="docutils literal"><span class="pre">required</span></tt> properties.</li>
 <li><tt class="docutils literal"><span class="pre">init</span></tt> - this option has a default value, either a string (if it is a
 parameter), or a boolean (if it is a switch; boolean constants are called
 <tt class="docutils literal"><span class="pre">true</span></tt> and <tt class="docutils literal"><span class="pre">false</span></tt>). List options can't have this attribute. Usage
@@ -417,8 +419,15 @@ readability. It is usually better to split tool descriptions and/or
 use TableGen inheritance instead.</p>
 <ul class="simple">
 <li>Possible tests are:<ul>
-<li><tt class="docutils literal"><span class="pre">switch_on</span></tt> - Returns true if a given command-line switch is
-provided by the user. Example: <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">&quot;opt&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">switch_on</span></tt> - Returns true if a given command-line switch is provided by
+the user. Can be given a list as argument, in that case <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">[&quot;foo&quot;,</span>
+<span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(switch_on</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(switch_on</span>
+<span class="pre">&quot;bar&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;baz&quot;))</span></tt>.
+Example: <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">&quot;opt&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">any_switch_on</span></tt> - Given a list of switch options, returns true if any of
+the switches is turned on.
+Example: <tt class="docutils literal"><span class="pre">(any_switch_on</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(or</span>
+<span class="pre">(switch_on</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">parameter_equals</span></tt> - Returns true if a command-line parameter equals
 a given value.
 Example: <tt class="docutils literal"><span class="pre">(parameter_equals</span> <span class="pre">&quot;W&quot;,</span> <span class="pre">&quot;all&quot;)</span></tt>.</li>
@@ -428,16 +437,24 @@ Example: <tt class="docutils literal"><span class="pre">(parameter_in_list</span
 <li><tt class="docutils literal"><span class="pre">input_languages_contain</span></tt> - Returns true if a given language
 belongs to the current input language set.
 Example: <tt class="docutils literal"><span class="pre">(input_languages_contain</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">in_language</span></tt> - Evaluates to true if the input file language
-equals to the argument. At the moment works only with <tt class="docutils literal"><span class="pre">cmd_line</span></tt>
-and <tt class="docutils literal"><span class="pre">actions</span></tt> (on non-join nodes).
+<li><tt class="docutils literal"><span class="pre">in_language</span></tt> - Evaluates to true if the input file language is equal to
+the argument. At the moment works only with <tt class="docutils literal"><span class="pre">cmd_line</span></tt> and <tt class="docutils literal"><span class="pre">actions</span></tt> (on
+non-join nodes).
 Example: <tt class="docutils literal"><span class="pre">(in_language</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">not_empty</span></tt> - Returns true if a given option (which should be
-either a parameter or a parameter list) is set by the
-user.
+<li><tt class="docutils literal"><span class="pre">not_empty</span></tt> - Returns true if a given option (which should be either a
+parameter or a parameter list) is set by the user. Like <tt class="docutils literal"><span class="pre">switch_on</span></tt>, can
+be also given a list as argument.
 Example: <tt class="docutils literal"><span class="pre">(not_empty</span> <span class="pre">&quot;o&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">any_not_empty</span></tt> - Returns true if <tt class="docutils literal"><span class="pre">not_empty</span></tt> returns true for any of
+the options in the list.
+Example: <tt class="docutils literal"><span class="pre">(any_not_empty</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(or</span>
+<span class="pre">(not_empty</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">empty</span></tt> - The opposite of <tt class="docutils literal"><span class="pre">not_empty</span></tt>. Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(not_empty</span>
-<span class="pre">X))</span></tt>. Provided for convenience.</li>
+<span class="pre">X))</span></tt>. Provided for convenience. Can be given a list as argument.</li>
+<li><tt class="docutils literal"><span class="pre">any_not_empty</span></tt> - Returns true if <tt class="docutils literal"><span class="pre">not_empty</span></tt> returns true for any of
+the options in the list.
+Example: <tt class="docutils literal"><span class="pre">(any_empty</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(and</span>
+<span class="pre">(not_empty</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;baz&quot;)))</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">single_input_file</span></tt> - Returns true if there was only one input file
 provided on the command-line. Used without arguments:
 <tt class="docutils literal"><span class="pre">(single_input_file)</span></tt>.</li>
@@ -481,8 +498,8 @@ options that aren't mentioned in the option list.</p>
 <li>Possible tool properties:<ul>
 <li><tt class="docutils literal"><span class="pre">in_language</span></tt> - input language name. Can be either a string or a
 list, in case the tool supports multiple input languages.</li>
-<li><tt class="docutils literal"><span class="pre">out_language</span></tt> - output language name. Tools are not allowed to
-have multiple output languages.</li>
+<li><tt class="docutils literal"><span class="pre">out_language</span></tt> - output language name. Multiple output languages are not
+allowed.</li>
 <li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - output file suffix. Can also be changed
 dynamically, see documentation on actions.</li>
 <li><tt class="docutils literal"><span class="pre">cmd_line</span></tt> - the actual command used to run the tool. You can
@@ -537,10 +554,11 @@ like a linker.</p>
 command.
 Example: <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(switch_on</span> <span class="pre">&quot;pthread&quot;),</span> <span class="pre">(append_cmd</span>
 <span class="pre">&quot;-lpthread&quot;))</span></tt></li>
-<li><tt class="docutils literal"><span class="pre">error`</span> <span class="pre">-</span> <span class="pre">exit</span> <span class="pre">with</span> <span class="pre">error.</span>
-<span class="pre">Example:</span> <span class="pre">``(error</span> <span class="pre">&quot;Mixing</span> <span class="pre">-c</span> <span class="pre">and</span> <span class="pre">-S</span> <span class="pre">is</span> <span class="pre">not</span> <span class="pre">allowed!&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward</span></tt> - forward an option unchanged.
-Example: <tt class="docutils literal"><span class="pre">(forward</span> <span class="pre">&quot;Wall&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">error</span></tt> - exit with error.
+Example: <tt class="docutils literal"><span class="pre">(error</span> <span class="pre">&quot;Mixing</span> <span class="pre">-c</span> <span class="pre">and</span> <span class="pre">-S</span> <span class="pre">is</span> <span class="pre">not</span> <span class="pre">allowed!&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">warning</span></tt> - print a warning.
+Example: <tt class="docutils literal"><span class="pre">(warning</span> <span class="pre">&quot;Specifying</span> <span class="pre">both</span> <span class="pre">-O1</span> <span class="pre">and</span> <span class="pre">-O2</span> <span class="pre">is</span> <span class="pre">meaningless!&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward</span></tt> - forward an option unchanged.  Example: <tt class="docutils literal"><span class="pre">(forward</span> <span class="pre">&quot;Wall&quot;)</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">forward_as</span></tt> - Change the name of an option, but forward the
 argument unchanged.
 Example: <tt class="docutils literal"><span class="pre">(forward_as</span> <span class="pre">&quot;O0&quot;,</span> <span class="pre">&quot;--disable-optimization&quot;)</span></tt>.</li>
@@ -583,10 +601,37 @@ linked with the root node. Since tools are not allowed to have
 multiple output languages, for nodes &quot;inside&quot; the graph the input and
 output languages should match. This is enforced at compile-time.</p>
 </div>
+<div class="section" id="option-preprocessor">
+<h1><a class="toc-backref" href="#id16">Option preprocessor</a></h1>
+<p>It is sometimes useful to run error-checking code before processing the
+compilation graph. For example, if optimization options &quot;-O1&quot; and &quot;-O2&quot; are
+implemented as switches, we might want to output a warning if the user invokes
+the driver with both of these options enabled.</p>
+<p>The <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> feature is reserved specially for these
+occasions. Example (adapted from the built-in Base plugin):</p>
+<pre class="literal-block">
+def Preprocess : OptionPreprocessor&lt;
+(case (and (switch_on &quot;O3&quot;), (any_switch_on [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;])),
+           [(unset_option [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;]),
+            (warning &quot;Multiple -O options specified, defaulted to -O3.&quot;)],
+      (and (switch_on &quot;O2&quot;), (any_switch_on [&quot;O0&quot;, &quot;O1&quot;])),
+           (unset_option [&quot;O0&quot;, &quot;O1&quot;]),
+      (and (switch_on &quot;O1&quot;), (switch_on &quot;O0&quot;)),
+           (unset_option &quot;O0&quot;))
+&gt;;
+</pre>
+<p>Here, <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> is used to unset all spurious optimization options
+(so that they are not forwarded to the compiler).</p>
+<p><tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> is basically a single big <tt class="docutils literal"><span class="pre">case</span></tt> expression, which is
+evaluated only once right after the plugin is loaded. The only allowed actions
+in <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> are <tt class="docutils literal"><span class="pre">error</span></tt>, <tt class="docutils literal"><span class="pre">warning</span></tt> and a special action
+<tt class="docutils literal"><span class="pre">unset_option</span></tt>, which, as the name suggests, unsets a given option. For
+convenience, <tt class="docutils literal"><span class="pre">unset_option</span></tt> also works on lists.</p>
+</div>
 <div class="section" id="more-advanced-topics">
-<h1><a class="toc-backref" href="#id16">More advanced topics</a></h1>
+<h1><a class="toc-backref" href="#id17">More advanced topics</a></h1>
 <div class="section" id="hooks-and-environment-variables">
-<span id="hooks"></span><h2><a class="toc-backref" href="#id17">Hooks and environment variables</a></h2>
+<span id="hooks"></span><h2><a class="toc-backref" href="#id18">Hooks and environment variables</a></h2>
 <p>Normally, LLVMC executes programs from the system <tt class="docutils literal"><span class="pre">PATH</span></tt>. Sometimes,
 this is not sufficient: for example, we may want to specify tool paths
 or names in the configuration file. This can be easily achieved via
@@ -619,7 +664,7 @@ the <tt class="docutils literal"><span class="pre">case</span></tt> expression (
 </pre>
 </div>
 <div class="section" id="how-plugins-are-loaded">
-<span id="priorities"></span><h2><a class="toc-backref" href="#id18">How plugins are loaded</a></h2>
+<span id="priorities"></span><h2><a class="toc-backref" href="#id19">How plugins are loaded</a></h2>
 <p>It is possible for LLVMC plugins to depend on each other. For example,
 one can create edges between nodes defined in some other plugin. To
 make this work, however, that plugin should be loaded first. To
@@ -635,7 +680,7 @@ with 0. Therefore, the plugin with the highest priority value will be
 loaded last.</p>
 </div>
 <div class="section" id="debugging">
-<h2><a class="toc-backref" href="#id19">Debugging</a></h2>
+<h2><a class="toc-backref" href="#id20">Debugging</a></h2>
 <p>When writing LLVMC plugins, it can be useful to get a visual view of
 the resulting compilation graph. This can be achieved via the command
 line option <tt class="docutils literal"><span class="pre">--view-graph</span></tt>. This command assumes that <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> and
@@ -651,7 +696,7 @@ perform any compilation tasks and returns the number of encountered
 errors as its status code.</p>
 </div>
 <div class="section" id="conditioning-on-the-executable-name">
-<h2><a class="toc-backref" href="#id20">Conditioning on the executable name</a></h2>
+<h2><a class="toc-backref" href="#id21">Conditioning on the executable name</a></h2>
 <p>For now, the executable name (the value passed to the driver in <tt class="docutils literal"><span class="pre">argv[0]</span></tt>) is
 accessible only in the C++ code (i.e. hooks). Use the following code:</p>
 <pre class="literal-block">
@@ -682,7 +727,7 @@ the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behav
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
+Last modified: $Date: 2009-10-26 02:35:46 +0100 (Mon, 26 Oct 2009) $
 </address></div>
 </div>
 </div>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index e3ceec6..ca6563b 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -1154,7 +1154,7 @@ first command may not be required if you are already using the module):</p>
 <div class="doc_code">
 <pre>
 $ mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
-$ echo ':llvm:M::llvm::/path/to/lli:' &gt; /proc/sys/fs/binfmt_misc/register
+$ echo ':llvm:M::BC::/path/to/lli:' &gt; /proc/sys/fs/binfmt_misc/register
 $ chmod u+x hello.bc   (if needed)
 $ ./hello.bc
 </pre>
@@ -1636,7 +1636,7 @@ out:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-23 08:20:06 +0200 (Fri, 23 Oct 2009) $
+  Last modified: $Date: 2009-11-04 07:15:28 +0100 (Wed, 04 Nov 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 9741fdb..c06a88b 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -83,6 +83,7 @@
       <li><a href="#complexconstants">Complex Constants</a></li>
       <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
       <li><a href="#undefvalues">Undefined Values</a></li>
+      <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
       <li><a href="#constantexprs">Constant Expressions</a></li>
       <li><a href="#metadata">Embedded Metadata</a></li>
     </ol>
@@ -110,6 +111,7 @@
           <li><a href="#i_ret">'<tt>ret</tt>' Instruction</a></li>
           <li><a href="#i_br">'<tt>br</tt>' Instruction</a></li>
           <li><a href="#i_switch">'<tt>switch</tt>' Instruction</a></li>
+          <li><a href="#i_indirectbr">'<tt>indirectbr</tt>' Instruction</a></li>
           <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
           <li><a href="#i_unwind">'<tt>unwind</tt>'  Instruction</a></li>
           <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
@@ -156,8 +158,6 @@
       </li>
       <li><a href="#memoryops">Memory Access and Addressing Operations</a>
         <ol>
-          <li><a href="#i_malloc">'<tt>malloc</tt>'   Instruction</a></li>
-          <li><a href="#i_free">'<tt>free</tt>'     Instruction</a></li>
           <li><a href="#i_alloca">'<tt>alloca</tt>'   Instruction</a></li>
          <li><a href="#i_load">'<tt>load</tt>'     Instruction</a></li>
          <li><a href="#i_store">'<tt>store</tt>'    Instruction</a></li>
@@ -338,7 +338,7 @@
    IR's", allowing many source languages to be mapped to them).  By providing
    type information, LLVM can be used as the target of optimizations: for
    example, through pointer analysis, it can be proven that a C automatic
-   variable is never accessed outside of the current function... allowing it to
+   variable is never accessed outside of the current function, allowing it to
    be promoted to a simple SSA value instead of a memory location.</p>
 
 </div>
@@ -359,12 +359,12 @@
 </pre>
 </div>
 
-<p>...because the definition of <tt>%x</tt> does not dominate all of its
-   uses. The LLVM infrastructure provides a verification pass that may be used
-   to verify that an LLVM module is well formed.  This pass is automatically run
-   by the parser after parsing input assembly and by the optimizer before it
-   outputs bitcode.  The violations pointed out by the verifier pass indicate
-   bugs in transformation passes or input to the parser.</p>
+<p>because the definition of <tt>%x</tt> does not dominate all of its uses. The
+   LLVM infrastructure provides a verification pass that may be used to verify
+   that an LLVM module is well formed.  This pass is automatically run by the
+   parser after parsing input assembly and by the optimizer before it outputs
+   bitcode.  The violations pointed out by the verifier pass indicate bugs in
+   transformation passes or input to the parser.</p>
 
 </div>
 
@@ -438,8 +438,8 @@
 
 <div class="doc_code">
 <pre>
-<a href="#i_add">add</a> i32 %X, %X           <i>; yields {i32}:%0</i>
-<a href="#i_add">add</a> i32 %0, %0           <i>; yields {i32}:%1</i>
+%0 = <a href="#i_add">add</a> i32 %X, %X           <i>; yields {i32}:%0</i>
+%1 = <a href="#i_add">add</a> i32 %0, %0           <i>; yields {i32}:%1</i>
 %result = <a href="#i_add">add</a> i32 %1, %1
 </pre>
 </div>
@@ -457,7 +457,7 @@
   <li>Unnamed temporaries are numbered sequentially</li>
 </ol>
 
-<p>...and it also shows a convention that we follow in this document.  When
+<p>It also shows a convention that we follow in this document.  When
    demonstrating instructions, we will follow an instruction with a comment that
    defines the type and name of value produced.  Comments are shown in italic
    text.</p>
@@ -482,24 +482,21 @@
    the "hello world" module:</p>
 
 <div class="doc_code">
-<pre><i>; Declare the string constant as a global constant...</i>
-<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a> <a
- href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00"          <i>; [13 x i8]*</i>
+<pre>
+<i>; Declare the string constant as a global constant.</i>
+<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a> <a href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00"    <i>; [13 x i8]*</i>
 
 <i>; External declaration of the puts function</i>
-<a href="#functionstructure">declare</a> i32 @puts(i8 *)                                           <i>; i32(i8 *)* </i>
+<a href="#functionstructure">declare</a> i32 @puts(i8 *)                                     <i>; i32(i8 *)* </i>
 
 <i>; Definition of main function</i>
-define i32 @main() {                                              <i>; i32()* </i>
-        <i>; Convert [13 x i8]* to i8  *...</i>
-        %cast210 = <a
- href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0   <i>; i8 *</i>
+define i32 @main() {                                        <i>; i32()* </i>
+  <i>; Convert [13 x i8]* to i8  *...</i>
+  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0   <i>; i8 *</i>
 
-        <i>; Call puts function to write out the string to stdout...</i>
-        <a
- href="#i_call">call</a> i32 @puts(i8 * %cast210)                             <i>; i32</i>
-        <a
- href="#i_ret">ret</a> i32 0<br>}<br>
+  <i>; Call puts function to write out the string to stdout.</i>
+  <a href="#i_call">call</a> i32 @puts(i8 * %cast210)                             <i>; i32</i>
+  <a href="#i_ret">ret</a> i32 0<br>}<br>
 </pre>
 </div>
 
@@ -527,7 +524,7 @@ define i32 @main() {                                              <i>; i32()* </
    linkage:</p>
 
 <dl>
-  <dt><tt><b><a name="linkage_private">private</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_private">private</a></b></tt></dt>
   <dd>Global values with private linkage are only directly accessible by objects
       in the current module.  In particular, linking code into a module with an
       private global value may cause the private to be renamed as necessary to
@@ -535,7 +532,7 @@ define i32 @main() {                                              <i>; i32()* </
       references can be updated. This doesn't show up in any symbol table in the
       object file.</dd>
 
-  <dt><tt><b><a name="linkage_linker_private">linker_private</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_linker_private">linker_private</a></b></tt></dt>
   <dd>Similar to private, but the symbol is passed through the assembler and
       removed by the linker after evaluation.  Note that (unlike private
       symbols) linker_private symbols are subject to coalescing by the linker:
@@ -543,12 +540,12 @@ define i32 @main() {                                              <i>; i32()* </
       normal strong symbols, they are removed by the linker from the final
       linked image (executable or dynamic library).</dd>
 
-  <dt><tt><b><a name="linkage_internal">internal</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_internal">internal</a></b></tt></dt>
   <dd>Similar to private, but the value shows as a local symbol
       (<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
       corresponds to the notion of the '<tt>static</tt>' keyword in C.</dd>
 
-  <dt><tt><b><a name="linkage_available_externally">available_externally</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_available_externally">available_externally</a></b></tt></dt>
   <dd>Globals with "<tt>available_externally</tt>" linkage are never emitted
       into the object file corresponding to the LLVM module.  They exist to
       allow inlining and other optimizations to take place given knowledge of
@@ -557,20 +554,20 @@ define i32 @main() {                                              <i>; i32()* </
       be discarded at will, and are otherwise the same as <tt>linkonce_odr</tt>.
       This linkage type is only allowed on definitions, not declarations.</dd>
 
-  <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt></dt>
   <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
       the same name when linkage occurs.  This is typically used to implement
       inline functions, templates, or other code which must be generated in each
       translation unit that uses it.  Unreferenced <tt>linkonce</tt> globals are
       allowed to be discarded.</dd>
 
-  <dt><tt><b><a name="linkage_weak">weak</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_weak">weak</a></b></tt></dt>
   <dd>"<tt>weak</tt>" linkage has the same merging semantics as
       <tt>linkonce</tt> linkage, except that unreferenced globals with
       <tt>weak</tt> linkage may not be discarded.  This is used for globals that
       are declared "weak" in C source code.</dd>
 
-  <dt><tt><b><a name="linkage_common">common</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_common">common</a></b></tt></dt>
   <dd>"<tt>common</tt>" linkage is most similar to "<tt>weak</tt>" linkage, but
       they are used for tentative definitions in C, such as "<tt>int X;</tt>" at
       global scope.
@@ -582,20 +579,20 @@ define i32 @main() {                                              <i>; i32()* </
       have common linkage.</dd>
 
 
-  <dt><tt><b><a name="linkage_appending">appending</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_appending">appending</a></b></tt></dt>
   <dd>"<tt>appending</tt>" linkage may only be applied to global variables of
       pointer to array type.  When two global variables with appending linkage
       are linked together, the two global arrays are appended together.  This is
       the LLVM, typesafe, equivalent of having the system linker append together
       "sections" with identical names when .o files are linked.</dd>
 
-  <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt></dt>
   <dd>The semantics of this linkage follow the ELF object file model: the symbol
       is weak until linked, if not linked, the symbol becomes null instead of
       being an undefined reference.</dd>
 
-  <dt><tt><b><a name="linkage_linkonce_odr">linkonce_odr</a></b></tt>: </dt>
-  <dt><tt><b><a name="linkage_weak_odr">weak_odr</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_linkonce_odr">linkonce_odr</a></b></tt></dt>
+  <dt><tt><b><a name="linkage_weak_odr">weak_odr</a></b></tt></dt>
   <dd>Some languages allow differing globals to be merged, such as two functions
       with different semantics.  Other languages, such as <tt>C++</tt>, ensure
       that only equivalent globals are ever merged (the "one definition rule" -
@@ -615,14 +612,14 @@ define i32 @main() {                                              <i>; i32()* </
    DLLs (Dynamic Link Libraries).</p>
 
 <dl>
-  <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt></dt>
   <dd>"<tt>dllimport</tt>" linkage causes the compiler to reference a function
       or variable via a global pointer to a pointer that is set up by the DLL
       exporting the symbol. On Microsoft Windows targets, the pointer name is
       formed by combining <code>__imp_</code> and the function or variable
       name.</dd>
 
-  <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt>: </dt>
+  <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt></dt>
   <dd>"<tt>dllexport</tt>" linkage causes the compiler to provide a global
       pointer to a pointer in a DLL, so that it can be referenced with the
       <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
@@ -935,24 +932,24 @@ declare signext i8 @returns_signed_char()
 <p>Currently, only the following parameter attributes are defined:</p>
 
 <dl>
-  <dt><tt>zeroext</tt></dt>
+  <dt><tt><b>zeroext</b></tt></dt>
   <dd>This indicates to the code generator that the parameter or return value
       should be zero-extended to a 32-bit value by the caller (for a parameter)
       or the callee (for a return value).</dd>
 
-  <dt><tt>signext</tt></dt>
+  <dt><tt><b>signext</b></tt></dt>
   <dd>This indicates to the code generator that the parameter or return value
       should be sign-extended to a 32-bit value by the caller (for a parameter)
       or the callee (for a return value).</dd>
 
-  <dt><tt>inreg</tt></dt>
+  <dt><tt><b>inreg</b></tt></dt>
   <dd>This indicates that this parameter or return value should be treated in a
       special target-dependent fashion during while emitting code for a function
       call or return (usually, by putting it in a register as opposed to memory,
       though some targets use it to distinguish between two different kinds of
       registers).  Use of this attribute is target-specific.</dd>
 
-  <dt><tt><a name="byval">byval</a></tt></dt>
+  <dt><tt><b><a name="byval">byval</a></b></tt></dt>
   <dd>This indicates that the pointer parameter should really be passed by value
       to the function.  The attribute implies that a hidden copy of the pointee
       is made between the caller and the callee, so the callee is unable to
@@ -967,7 +964,7 @@ declare signext i8 @returns_signed_char()
       generator that usually indicates a desired alignment for the synthesized
       stack slot.</dd>
 
-  <dt><tt>sret</tt></dt>
+  <dt><tt><b>sret</b></tt></dt>
   <dd>This indicates that the pointer parameter specifies the address of a
       structure that is the return value of the function in the source program.
       This pointer must be guaranteed by the caller to be valid: loads and
@@ -975,7 +972,7 @@ declare signext i8 @returns_signed_char()
       may only be applied to the first parameter. This is not a valid attribute
       for return values. </dd>
 
-  <dt><tt>noalias</tt></dt>
+  <dt><tt><b>noalias</b></tt></dt>
   <dd>This indicates that the pointer does not alias any global or any other
       parameter.  The caller is responsible for ensuring that this is the
       case. On a function return value, <tt>noalias</tt> additionally indicates
@@ -985,12 +982,12 @@ declare signext i8 @returns_signed_char()
       <a href="http://llvm.org/docs/AliasAnalysis.html#MustMayNo">alias
       analysis</a>.</dd>
 
-  <dt><tt>nocapture</tt></dt>
+  <dt><tt><b>nocapture</b></tt></dt>
   <dd>This indicates that the callee does not make any copies of the pointer
       that outlive the callee itself. This is not a valid attribute for return
       values.</dd>
 
-  <dt><tt>nest</tt></dt>
+  <dt><tt><b>nest</b></tt></dt>
   <dd>This indicates that the pointer parameter can be excised using the
       <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
       attribute for return values.</dd>
@@ -1010,7 +1007,7 @@ declare signext i8 @returns_signed_char()
 
 <div class="doc_code">
 <pre>
-define void @f() gc "name" { ...
+define void @f() gc "name" { ... }
 </pre>
 </div>
 
@@ -1040,42 +1037,42 @@ define void @f() gc "name" { ...
 define void @f() noinline { ... }
 define void @f() alwaysinline { ... }
 define void @f() alwaysinline optsize { ... }
-define void @f() optsize
+define void @f() optsize { ... }
 </pre>
 </div>
 
 <dl>
-  <dt><tt>alwaysinline</tt></dt>
+  <dt><tt><b>alwaysinline</b></tt></dt>
   <dd>This attribute indicates that the inliner should attempt to inline this
       function into callers whenever possible, ignoring any active inlining size
       threshold for this caller.</dd>
 
-  <dt><tt>inlinehint</tt></dt>
+  <dt><tt><b>inlinehint</b></tt></dt>
   <dd>This attribute indicates that the source code contained a hint that inlining
       this function is desirable (such as the "inline" keyword in C/C++).  It
       is just a hint; it imposes no requirements on the inliner.</dd>
 
-  <dt><tt>noinline</tt></dt>
+  <dt><tt><b>noinline</b></tt></dt>
   <dd>This attribute indicates that the inliner should never inline this
       function in any situation. This attribute may not be used together with
       the <tt>alwaysinline</tt> attribute.</dd>
 
-  <dt><tt>optsize</tt></dt>
+  <dt><tt><b>optsize</b></tt></dt>
   <dd>This attribute suggests that optimization passes and code generator passes
       make choices that keep the code size of this function low, and otherwise
       do optimizations specifically to reduce code size.</dd>
 
-  <dt><tt>noreturn</tt></dt>
+  <dt><tt><b>noreturn</b></tt></dt>
   <dd>This function attribute indicates that the function never returns
       normally.  This produces undefined behavior at runtime if the function
       ever does dynamically return.</dd>
 
-  <dt><tt>nounwind</tt></dt>
+  <dt><tt><b>nounwind</b></tt></dt>
   <dd>This function attribute indicates that the function never returns with an
       unwind or exceptional control flow.  If the function does unwind, its
       runtime behavior is undefined.</dd>
 
-  <dt><tt>readnone</tt></dt>
+  <dt><tt><b>readnone</b></tt></dt>
   <dd>This attribute indicates that the function computes its result (or decides
       to unwind an exception) based strictly on its arguments, without
       dereferencing any pointer arguments or otherwise accessing any mutable
@@ -1086,7 +1083,7 @@ define void @f() optsize
       exceptions by calling the <tt>C++</tt> exception throwing methods, but
       could use the <tt>unwind</tt> instruction.</dd>
 
-  <dt><tt><a name="readonly">readonly</a></tt></dt>
+  <dt><tt><b><a name="readonly">readonly</a></b></tt></dt>
   <dd>This attribute indicates that the function does not write through any
       pointer arguments (including <tt><a href="#byval">byval</a></tt>
       arguments) or otherwise modify any state (e.g. memory, control registers,
@@ -1097,7 +1094,7 @@ define void @f() optsize
       exception by calling the <tt>C++</tt> exception throwing methods, but may
       use the <tt>unwind</tt> instruction.</dd>
 
-  <dt><tt><a name="ssp">ssp</a></tt></dt>
+  <dt><tt><b><a name="ssp">ssp</a></b></tt></dt>
   <dd>This attribute indicates that the function should emit a stack smashing
       protector. It is in the form of a "canary"&mdash;a random value placed on
       the stack before the local variables that's checked upon return from the
@@ -1108,7 +1105,7 @@ define void @f() optsize
       function that doesn't have an <tt>ssp</tt> attribute, then the resulting
       function will have an <tt>ssp</tt> attribute.</dd>
 
-  <dt><tt>sspreq</tt></dt>
+  <dt><tt><b>sspreq</b></tt></dt>
   <dd>This attribute indicates that the function should <em>always</em> emit a
       stack smashing protector. This overrides
       the <tt><a href="#ssp">ssp</a></tt> function attribute.<br>
@@ -1118,14 +1115,14 @@ define void @f() optsize
       an <tt>ssp</tt> attribute, then the resulting function will have
       an <tt>sspreq</tt> attribute.</dd>
 
-  <dt><tt>noredzone</tt></dt>
+  <dt><tt><b>noredzone</b></tt></dt>
   <dd>This attribute indicates that the code generator should not use a red
       zone, even if the target-specific ABI normally permits it.</dd>
 
-  <dt><tt>noimplicitfloat</tt></dt>
+  <dt><tt><b>noimplicitfloat</b></tt></dt>
   <dd>This attributes disables implicit floating point instructions.</dd>
 
-  <dt><tt>naked</tt></dt>
+  <dt><tt><b>naked</b></tt></dt>
   <dd>This attribute disables prologue / epilogue emission for the function.
       This can have very system-specific consequences.</dd>
 </dl>
@@ -2172,6 +2169,34 @@ has undefined behavior.</p>
 </div>
 
 <!-- ======================================================================= -->
+<div class="doc_subsection"><a name="blockaddress">Addresses of Basic
+    Blocks</a></div>
+<div class="doc_text">
+
+<p><b><tt>blockaddress(@function, %block)</tt></b></p>
+
+<p>The '<tt>blockaddress</tt>' constant computes the address of the specified
+   basic block in the specified function, and always has an i8* type.  Taking
+   the address of the entry block is illegal.</p>
+     
+<p>This value only has defined behavior when used as an operand to the
+   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction or for comparisons
+   against null.  Pointer equality tests between labels addresses is undefined
+   behavior - though, again, comparison against null is ok, and no label is
+   equal to the null pointer.  This may also be passed around as an opaque
+   pointer sized value as long as the bits are not inspected.  This allows
+   <tt>ptrtoint</tt> and arithmetic to be performed on these values so long as
+   the original value is reconstituted before the <tt>indirectbr</tt>.</p>
+   
+<p>Finally, some targets may provide defined semantics when
+   using the value as the operand to an inline assembly, but that is target
+   specific.
+   </p>
+
+</div>
+
+
+<!-- ======================================================================= -->
 <div class="doc_subsection"><a name="constantexprs">Constant Expressions</a>
 </div>
 
@@ -2513,6 +2538,7 @@ Instructions</a> </div>
    '<a href="#i_ret"><tt>ret</tt></a>' instruction, the
    '<a href="#i_br"><tt>br</tt></a>' instruction, the
    '<a href="#i_switch"><tt>switch</tt></a>' instruction, the
+   '<a href="#i_indirectbr">'<tt>indirectbr</tt></a>' Instruction, the
    '<a href="#i_invoke"><tt>invoke</tt></a>' instruction, the
    '<a href="#i_unwind"><tt>unwind</tt></a>' instruction, and the
    '<a href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
@@ -2671,6 +2697,55 @@ IfUnequal:
 
 </div>
 
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_indirectbr">'<tt>indirectbr</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  indirectbr &lt;somety&gt;* &lt;address&gt;, [ label &lt;dest1&gt;, label &lt;dest2&gt;, ... ]
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>indirectbr</tt>' instruction implements an indirect branch to a label
+   within the current function, whose address is specified by
+   "<tt>address</tt>".  Address must be derived from a <a
+   href="#blockaddress">blockaddress</a> constant.</p>
+
+<h5>Arguments:</h5>
+
+<p>The '<tt>address</tt>' argument is the address of the label to jump to.  The
+   rest of the arguments indicate the full set of possible destinations that the
+   address may point to.  Blocks are allowed to occur multiple times in the
+   destination list, though this isn't particularly useful.</p>
+   
+<p>This destination list is required so that dataflow analysis has an accurate
+   understanding of the CFG.</p>
+
+<h5>Semantics:</h5>
+
+<p>Control transfers to the block specified in the address argument.  All
+   possible destination blocks must be listed in the label list, otherwise this
+   instruction has undefined behavior.  This implies that jumps to labels
+   defined in other functions have undefined behavior as well.</p>
+
+<h5>Implementation:</h5>
+
+<p>This is typically implemented with a jump through a register.</p>
+
+<h5>Example:</h5>
+<pre>
+ indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
+</pre>
+
+</div>
+
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="i_invoke">'<tt>invoke</tt>' Instruction</a>
@@ -3650,7 +3725,7 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  %result = extractelement &lt;4 x i32&gt; %vec, i32 0    <i>; yields i32</i>
+  &lt;result&gt; = extractelement &lt;4 x i32&gt; %vec, i32 0    <i>; yields i32</i>
 </pre>
 
 </div>
@@ -3686,7 +3761,7 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  %result = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0    <i>; yields &lt;4 x i32&gt;</i>
+  &lt;result&gt; = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0    <i>; yields &lt;4 x i32&gt;</i>
 </pre>
 
 </div>
@@ -3727,13 +3802,13 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
                           &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef, 
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef, 
                           &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
-  %result = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef, 
+  &lt;result&gt; = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef, 
                           &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
                           &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
 </pre>
 
@@ -3779,7 +3854,7 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  %result = extractvalue {i32, float} %agg, 0    <i>; yields i32</i>
+  &lt;result&gt; = extractvalue {i32, float} %agg, 0    <i>; yields i32</i>
 </pre>
 
 </div>
@@ -3818,7 +3893,7 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  %result = insertvalue {i32, float} %agg, i32 1, 0    <i>; yields {i32, float}</i>
+  &lt;result&gt; = insertvalue {i32, float} %agg, i32 1, 0    <i>; yields {i32, float}</i>
 </pre>
 
 </div>
@@ -3833,95 +3908,13 @@ Instruction</a> </div>
 
 <p>A key design point of an SSA-based representation is how it represents
    memory.  In LLVM, no memory locations are in SSA form, which makes things
-   very simple.  This section describes how to read, write, allocate, and free
+   very simple.  This section describes how to read, write, and allocate
    memory in LLVM.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="i_malloc">'<tt>malloc</tt>' Instruction</a>
-</div>
-
-<div class="doc_text">
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = malloc &lt;type&gt;[, i32 &lt;NumElements&gt;][, align &lt;alignment&gt;]     <i>; yields {type*}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>malloc</tt>' instruction allocates memory from the system heap and
-   returns a pointer to it. The object is always allocated in the generic
-   address space (address space zero).</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>malloc</tt>' instruction allocates
-   <tt>sizeof(&lt;type&gt;)*NumElements</tt> bytes of memory from the operating
-   system and returns a pointer of the appropriate type to the program.  If
-   "NumElements" is specified, it is the number of elements allocated, otherwise
-   "NumElements" is defaulted to be one.  If a constant alignment is specified,
-   the value result of the allocation is guaranteed to be aligned to at least
-   that boundary.  If not specified, or if zero, the target can choose to align
-   the allocation on any convenient boundary compatible with the type.</p>
-
-<p>'<tt>type</tt>' must be a sized type.</p>
-
-<h5>Semantics:</h5>
-<p>Memory is allocated using the system "<tt>malloc</tt>" function, and a
-   pointer is returned.  The result of a zero byte allocation is undefined.  The
-   result is null if there is insufficient memory available.</p>
-
-<h5>Example:</h5>
-<pre>
-  %array  = malloc [4 x i8]                     <i>; yields {[%4 x i8]*}:array</i>
-
-  %size   = <a href="#i_add">add</a> i32 2, 2                        <i>; yields {i32}:size = i32 4</i>
-  %array1 = malloc i8, i32 4                    <i>; yields {i8*}:array1</i>
-  %array2 = malloc [12 x i8], i32 %size         <i>; yields {[12 x i8]*}:array2</i>
-  %array3 = malloc i32, i32 4, align 1024       <i>; yields {i32*}:array3</i>
-  %array4 = malloc i32, align 1024              <i>; yields {i32*}:array4</i>
-</pre>
-
-<p>Note that the code generator does not yet respect the alignment value.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="i_free">'<tt>free</tt>' Instruction</a>
-</div>
-
-<div class="doc_text">
-
-<h5>Syntax:</h5>
-<pre>
-  free &lt;type&gt; &lt;value&gt;                           <i>; yields {void}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>free</tt>' instruction returns memory back to the unused memory heap
-   to be reallocated in the future.</p>
-
-<h5>Arguments:</h5>
-<p>'<tt>value</tt>' shall be a pointer value that points to a value that was
-   allocated with the '<tt><a href="#i_malloc">malloc</a></tt>' instruction.</p>
-
-<h5>Semantics:</h5>
-<p>Access to the memory pointed to by the pointer is no longer defined after
-   this instruction executes.  If the pointer is null, the operation is a
-   noop.</p>
-
-<h5>Example:</h5>
-<pre>
-  %array  = <a href="#i_malloc">malloc</a> [4 x i8]                     <i>; yields {[4 x i8]*}:array</i>
-            free   [4 x i8]* %array
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
   <a name="i_alloca">'<tt>alloca</tt>' Instruction</a>
 </div>
 
@@ -4253,7 +4246,7 @@ entry:
 <pre>
   %X = trunc i32 257 to i8              <i>; yields i8:1</i>
   %Y = trunc i32 123 to i1              <i>; yields i1:true</i>
-  %Y = trunc i32 122 to i1              <i>; yields i1:false</i>
+  %Z = trunc i32 122 to i1              <i>; yields i1:false</i>
 </pre>
 
 </div>
@@ -4437,7 +4430,7 @@ entry:
 <pre>
   %X = fptoui double 123.0 to i32      <i>; yields i32:123</i>
   %Y = fptoui float 1.0E+300 to i1     <i>; yields undefined:1</i>
-  %X = fptoui float 1.04E+17 to i8     <i>; yields undefined:1</i>
+  %Z = fptoui float 1.04E+17 to i8     <i>; yields undefined:1</i>
 </pre>
 
 </div>
@@ -4475,7 +4468,7 @@ entry:
 <pre>
   %X = fptosi double -123.0 to i32      <i>; yields i32:-123</i>
   %Y = fptosi float 1.0E-247 to i1      <i>; yields undefined:1</i>
-  %X = fptosi float 1.04E+17 to i8      <i>; yields undefined:1</i>
+  %Z = fptosi float 1.04E+17 to i8      <i>; yields undefined:1</i>
 </pre>
 
 </div>
@@ -4619,8 +4612,8 @@ entry:
 <h5>Example:</h5>
 <pre>
   %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
-  %X = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
-  %Y = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
+  %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
+  %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
 </pre>
 
 </div>
@@ -6624,7 +6617,8 @@ LLVM</a>.</p>
 
 <h5>Example:</h5>
 <pre>
-%ptr      = malloc i32
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
             store i32 4, %ptr
 
 %result1  = load i32* %ptr      <i>; yields {i32}:result1 = 4</i>
@@ -6675,7 +6669,8 @@ LLVM</a>.</p>
 
 <h5>Examples:</h5>
 <pre>
-%ptr      = malloc i32
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
             store i32 4, %ptr
 
 %val1     = add i32 4, 4
@@ -6730,7 +6725,8 @@ LLVM</a>.</p>
 
 <h5>Examples:</h5>
 <pre>
-%ptr      = malloc i32
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
             store i32 4, %ptr
 
 %val1     = add i32 4, 4
@@ -6785,8 +6781,9 @@ LLVM</a>.</p>
 
 <h5>Examples:</h5>
 <pre>
-%ptr      = malloc i32
-        store i32 4, %ptr
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 4, %ptr
 %result1  = call i32 @llvm.atomic.load.add.i32.p0i32( i32* %ptr, i32 4 )
                                 <i>; yields {i32}:result1 = 4</i>
 %result2  = call i32 @llvm.atomic.load.add.i32.p0i32( i32* %ptr, i32 2 )
@@ -6836,8 +6833,9 @@ LLVM</a>.</p>
 
 <h5>Examples:</h5>
 <pre>
-%ptr      = malloc i32
-        store i32 8, %ptr
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 8, %ptr
 %result1  = call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %ptr, i32 4 )
                                 <i>; yields {i32}:result1 = 8</i>
 %result2  = call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %ptr, i32 2 )
@@ -6913,8 +6911,9 @@ LLVM</a>.</p>
 
 <h5>Examples:</h5>
 <pre>
-%ptr      = malloc i32
-        store i32 0x0F0F, %ptr
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 0x0F0F, %ptr
 %result0  = call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %ptr, i32 0xFF )
                                 <i>; yields {i32}:result0 = 0x0F0F</i>
 %result1  = call i32 @llvm.atomic.load.and.i32.p0i32( i32* %ptr, i32 0xFF )
@@ -6991,8 +6990,9 @@ LLVM</a>.</p>
 
 <h5>Examples:</h5>
 <pre>
-%ptr      = malloc i32
-        store i32 7, %ptr
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 7, %ptr
 %result0  = call i32 @llvm.atomic.load.min.i32.p0i32( i32* %ptr, i32 -2 )
                                 <i>; yields {i32}:result0 = 7</i>
 %result1  = call i32 @llvm.atomic.load.max.i32.p0i32( i32* %ptr, i32 8 )
@@ -7043,8 +7043,8 @@ LLVM</a>.</p>
 <h5>Semantics:</h5>
 <p>This intrinsic indicates that before this point in the code, the value of the
    memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
-   never be used and has an undefined value.  A load from the pointer that is
-   preceded by this intrinsic can be replaced with
+   never be used and has an undefined value.  A load from the pointer that
+   precedes this intrinsic can be replaced with
    <tt>'<a href="#undefvalues">undef</a>'</tt>.</p>
 
 </div>
@@ -7278,7 +7278,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-22 01:28:00 +0200 (Thu, 22 Oct 2009) $
+  Last modified: $Date: 2009-11-02 01:25:26 +0100 (Mon, 02 Nov 2009) $
 </address>
 
 </body>
diff --git a/docs/Passes.html b/docs/Passes.html
index 48f5adf..2107e13 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -158,7 +158,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#memcpyopt">-memcpyopt</a></td><td>Optimize use of memcpy and friends</td></tr>
 <tr><td><a href="#mergereturn">-mergereturn</a></td><td>Unify function exit nodes</td></tr>
 <tr><td><a href="#prune-eh">-prune-eh</a></td><td>Remove unused exception handling info</td></tr>
-<tr><td><a href="#raiseallocs">-raiseallocs</a></td><td>Raise allocations from calls to instructions</td></tr>
 <tr><td><a href="#reassociate">-reassociate</a></td><td>Reassociate expressions</td></tr>
 <tr><td><a href="#reg2mem">-reg2mem</a></td><td>Demote all values to stack slots</td></tr>
 <tr><td><a href="#scalarrepl">-scalarrepl</a></td><td>Scalar Replacement of Aggregates</td></tr>
@@ -1504,17 +1503,6 @@ if (X &lt; 3) {</pre>
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="raiseallocs">Raise allocations from calls to instructions</a>
-</div>
-<div class="doc_text">
-  <p>
-  Converts <tt>@malloc</tt> and <tt>@free</tt> calls to <tt>malloc</tt> and
-  <tt>free</tt> instructions.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
   <a name="reassociate">Reassociate expressions</a>
 </div>
 <div class="doc_text">
@@ -1799,8 +1787,8 @@ if (X &lt; 3) {</pre>
         integrals f.e.</li>
     <li>All of the constants in a switch statement are of the correct type.</li>
     <li>The code is in valid SSA form.</li>
-    <li>It should be illegal to put a label into any other type (like a
-        structure) or to return one. [except constant arrays!]</li>
+    <li>It is illegal to put a label into any other type (like a structure) or 
+        to return one.</li>
     <li>Only phi nodes can be self referential: <tt>%x = add i32 %x, %x</tt> is
         invalid.</li>
     <li>PHI nodes must have an entry for each predecessor, with no extras.</li>
@@ -1860,7 +1848,7 @@ if (X &lt; 3) {</pre>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
+  Last modified: $Date: 2009-10-28 05:47:06 +0100 (Wed, 28 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 2b93b1f..5a0936f 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -1058,7 +1058,7 @@ there isn't already one.</p>
     
 <li>LLVM will not correctly compile on Solaris and/or OpenSolaris
 using the stock GCC 3.x.x series 'out the box',
-See: <a href="#brokengcc">Broken versions of GCC and other tools</a>.
+See: <a href="GettingStarted.html#brokengcc">Broken versions of GCC and other tools</a>.
 However, A <a href="http://pkg.auroraux.org/GCC">Modern GCC Build</a>
 for x86/x86-64 has been made available from the third party AuroraUX Project
 that has been meticulously tested for bootstrapping LLVM &amp; Clang.</li>
@@ -1348,7 +1348,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-16 18:30:58 +0200 (Fri, 16 Oct 2009) $
+  Last modified: $Date: 2009-11-03 22:50:09 +0100 (Tue, 03 Nov 2009) $
 </address>
 
 </body>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 7ae1ca4..4ae6718 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -151,7 +151,7 @@ file prints this (at the time of this writing):</p>
   <b>bit</b> isReMaterializable = 0;
   <b>bit</b> isPredicable = 0;
   <b>bit</b> hasDelaySlot = 0;
-  <b>bit</b> usesCustomDAGSchedInserter = 0;
+  <b>bit</b> usesCustomInserter = 0;
   <b>bit</b> hasCtrlDep = 0;
   <b>bit</b> isNotDuplicable = 0;
   <b>bit</b> hasSideEffects = 0;
@@ -794,7 +794,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-05 04:51:06 +0200 (Mon, 05 Oct 2009) $
+  Last modified: $Date: 2009-10-29 19:10:34 +0100 (Thu, 29 Oct 2009) $
 </address>
 
 </body>
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
index 3188135..728d518 100644
--- a/docs/tutorial/LangImpl4.html
+++ b/docs/tutorial/LangImpl4.html
@@ -388,24 +388,19 @@ entry:
 </pre>
 </div>
 
-<p>This illustrates that we can now call user code, but there is something a bit subtle
-going on here.  Note that we only invoke the JIT on the anonymous functions
-that <em>call testfunc</em>, but we never invoked it on <em>testfunc
-</em>itself.</p>
-
-<p>What actually happened here is that the anonymous function was
-JIT'd when requested.  When the Kaleidoscope app calls through the function
-pointer that is returned, the anonymous function starts executing.  It ends up
-making the call to the "testfunc" function, and ends up in a stub that invokes
-the JIT, lazily, on testfunc.  Once the JIT finishes lazily compiling testfunc,
-it returns and the code re-executes the call.</p>
-
-<p>In summary, the JIT will lazily JIT code, on the fly, as it is needed.  The
-JIT provides a number of other more advanced interfaces for things like freeing
-allocated machine code, rejit'ing functions to update them, etc.  However, even
-with this simple code, we get some surprisingly powerful capabilities - check
-this out (I removed the dump of the anonymous functions, you should get the idea
-by now :) :</p>
+<p>This illustrates that we can now call user code, but there is something a bit
+subtle going on here.  Note that we only invoke the JIT on the anonymous
+functions that <em>call testfunc</em>, but we never invoked it
+on <em>testfunc</em> itself.  What actually happened here is that the JIT
+scanned for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning
+from <tt>getPointerToFunction()</tt>.</p>
+
+<p>The JIT provides a number of other more advanced interfaces for things like
+freeing allocated machine code, rejit'ing functions to update them, etc.
+However, even with this simple code, we get some surprisingly powerful
+capabilities - check this out (I removed the dump of the anonymous functions,
+you should get the idea by now :) :</p>
 
 <div class="doc_code">
 <pre>
@@ -453,8 +448,8 @@ directly.</p>
 resolved.  It allows you to establish explicit mappings between IR objects and
 addresses (useful for LLVM global variables that you want to map to static
 tables, for example), allows you to dynamically decide on the fly based on the
-function name, and even allows you to have the JIT abort itself if any lazy
-compilation is attempted.</p>
+function name, and even allows you to have the JIT compile functions lazily the
+first time they're called.</p>
 
 <p>One interesting application of this is that we can now extend the language
 by writing arbitrary C++ code to implement operations.  For example, if we add:
diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html
index 26f2532..543e12f 100644
--- a/docs/tutorial/OCamlLangImpl4.html
+++ b/docs/tutorial/OCamlLangImpl4.html
@@ -406,22 +406,17 @@ entry:
 
 <p>This illustrates that we can now call user code, but there is something a bit
 subtle going on here.  Note that we only invoke the JIT on the anonymous
-functions that <em>call testfunc</em>, but we never invoked it on <em>testfunc
-</em>itself.</p>
-
-<p>What actually happened here is that the anonymous function was JIT'd when
-requested.  When the Kaleidoscope app calls through the function pointer that is
-returned, the anonymous function starts executing.  It ends up making the call
-to the "testfunc" function, and ends up in a stub that invokes the JIT, lazily,
-on testfunc.  Once the JIT finishes lazily compiling testfunc,
-it returns and the code re-executes the call.</p>
-
-<p>In summary, the JIT will lazily JIT code, on the fly, as it is needed.  The
-JIT provides a number of other more advanced interfaces for things like freeing
-allocated machine code, rejit'ing functions to update them, etc.  However, even
-with this simple code, we get some surprisingly powerful capabilities - check
-this out (I removed the dump of the anonymous functions, you should get the idea
-by now :) :</p>
+functions that <em>call testfunc</em>, but we never invoked it
+on <em>testfunc</em> itself.  What actually happened here is that the JIT
+scanned for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning
+from <tt>run_function</tt>.</p>
+
+<p>The JIT provides a number of other more advanced interfaces for things like
+freeing allocated machine code, rejit'ing functions to update them, etc.
+However, even with this simple code, we get some surprisingly powerful
+capabilities - check this out (I removed the dump of the anonymous functions,
+you should get the idea by now :) :</p>
 
 <div class="doc_code">
 <pre>
@@ -467,8 +462,8 @@ calls in the module to call the libm version of <tt>sin</tt> directly.</p>
 get resolved.  It allows you to establish explicit mappings between IR objects
 and addresses (useful for LLVM global variables that you want to map to static
 tables, for example), allows you to dynamically decide on the fly based on the
-function name, and even allows you to have the JIT abort itself if any lazy
-compilation is attempted.</p>
+function name, and even allows you to have the JIT compile functions lazily the
+first time they're called.</p>
 
 <p>One interesting application of this is that we can now extend the language
 by writing arbitrary C code to implement operations.  For example, if we add:
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index c64b87f..f17a950 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -117,8 +117,8 @@ void BrainF::header(LLVMContext& C) {
   //brainf.end:
   endbb = BasicBlock::Create(C, label, brainf_func);
 
-  //free i8 *%arr
-  new FreeInst(ptr_arr, endbb);
+  //call free(i8 *%arr)
+  endbb->getInstList().push_back(CallInst::CreateFree(ptr_arr, endbb));
 
   //ret void
   ReturnInst::Create(C, endbb);
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 353cab2..7cb5bc3 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -33,7 +33,7 @@
 #ifndef LLVM_C_CORE_H
 #define LLVM_C_CORE_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 #ifdef __cplusplus
 
@@ -455,8 +455,7 @@ void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle);
         macro(UnreachableInst)              \
         macro(UnwindInst)                   \
     macro(UnaryInstruction)                 \
-      macro(AllocationInst)                 \
-        macro(AllocaInst)                   \
+      macro(AllocaInst)                     \
       macro(CastInst)                       \
         macro(BitCastInst)                  \
         macro(FPExtInst)                    \
@@ -471,7 +470,6 @@ void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle);
         macro(UIToFPInst)                   \
         macro(ZExtInst)                     \
       macro(ExtractValueInst)               \
-      macro(FreeInst)                       \
       macro(LoadInst)                       \
       macro(VAArgInst)
 
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 9bc947f..0a94315 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -54,7 +54,7 @@ void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM);
 /** See llvm::createPruneEHPass function. */
 void LLVMAddPruneEHPass(LLVMPassManagerRef PM);
 
-/** See llvm::createRaiseAllocationsPass function. */
+// FIXME: Remove in LLVM 3.0.
 void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM);
 
 /** See llvm::createStripDeadPrototypesPass function. */
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 0ed2d5a..52354b7 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -454,12 +454,12 @@ public:
     return Ptr != RHS.Ptr;
   }
 
-  inline DenseMapIterator& operator++() {          // Preincrement
+  inline DenseMapIterator& operator++() {  // Preincrement
     ++Ptr;
     AdvancePastEmptyBuckets();
     return *this;
   }
-  DenseMapIterator operator++(int) {        // Postincrement
+  DenseMapIterator operator++(int) {  // Postincrement
     DenseMapIterator tmp = *this; ++*this; return tmp;
   }
 
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index ac9dd4d..f5f3d49 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <set>
 
 namespace llvm {
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 26090ce..81dc469 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_ADT_FOLDINGSET_H
 #define LLVM_ADT_FOLDINGSET_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index a7f5819..5f8cb57 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -16,7 +16,7 @@
 
 #include "llvm/Support/Allocator.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 8627909..16b4403 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -16,7 +16,7 @@
 
 #include "llvm/Support/Allocator.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 #include <functional>
 
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 7d00e9a..c29fc9f 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -18,7 +18,7 @@
 #include <cassert>
 #include <cstring>
 #include <iterator>
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 
 namespace llvm {
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index b7a6873..6c813ec 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -17,7 +17,7 @@
 
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 899823d..85936c0 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_ADT_STRINGEXTRAS_H
 #define LLVM_ADT_STRINGEXTRAS_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/StringRef.h"
 #include <cctype>
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
new file mode 100644
index 0000000..48a52de
--- /dev/null
+++ b/include/llvm/ADT/StringSwitch.h
@@ -0,0 +1,83 @@
+//===--- StringSwitch.h - Switch-on-literal-string Construct --------------===/
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===/
+//
+//  This file implements the StringSwitch template, which mimics a switch()
+//  statements whose cases are string literals.
+//
+//===----------------------------------------------------------------------===/
+#ifndef LLVM_ADT_STRINGSWITCH_H
+#define LLVM_ADT_STRINGSWITCH_H
+
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+  
+/// \brief A switch()-like statement whose cases are string literals.
+///
+/// The StringSwitch class is a simple form of a switch() statement that
+/// determines whether the given string matches one of the given string
+/// literals. The template type parameter \p T is the type of the value that
+/// will be returned from the string-switch expression. For example,
+/// the following code switches on the name of a color in \c argv[i]:
+///
+/// \code
+/// Color color = StringSwitch<Color>(argv[i])
+///   .Case("red", Red)
+///   .Case("orange", Orange)
+///   .Case("yellow", Yellow)
+///   .Case("green", Green)
+///   .Case("blue", Blue)
+///   .Case("indigo", Indigo)
+///   .Case("violet", Violet)
+///   .Default(UnknownColor);
+/// \endcode
+template<typename T>
+class StringSwitch {
+  /// \brief The string we are matching.
+  StringRef Str;
+  
+  /// \brief The result of this switch statement, once known.
+  T Result;
+  
+  /// \brief Set true when the result of this switch is already known; in this
+  /// case, Result is valid.
+  bool ResultKnown;
+  
+public:
+  explicit StringSwitch(StringRef Str) 
+  : Str(Str), ResultKnown(false) { }
+  
+  template<unsigned N>
+  StringSwitch& Case(const char (&S)[N], const T& Value) {
+    if (!ResultKnown && N-1 == Str.size() && 
+        (std::memcmp(S, Str.data(), N-1) == 0)) {
+      Result = Value;
+      ResultKnown = true;
+    }
+    
+    return *this;
+  }
+  
+  T Default(const T& Value) {
+    if (ResultKnown)
+      return Result;
+    
+    return Value;
+  }
+  
+  operator T() {
+    assert(ResultKnown && "Fell off the end of a string-switch");
+    return Result;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_STRINGSWITCH_H
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index 88fde0a..f4722db 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -11,7 +11,7 @@
 #define LLVM_ADT_TWINE_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 #include <string>
 
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index 14f2100..b043c38 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -7,7 +7,19 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the ValueMap class.
+// This file defines the ValueMap class.  ValueMap maps Value* or any subclass
+// to an arbitrary other type.  It provides the DenseMap interface but updates
+// itself to remain safe when keys are RAUWed or deleted.  By default, when a
+// key is RAUWed from V1 to V2, the old mapping V1->target is removed, and a new
+// mapping V2->target is added.  If V2 already existed, its old target is
+// overwritten.  When a key is deleted, its mapping is removed.
+//
+// You can override a ValueMap's Config parameter to control exactly what
+// happens on RAUW and destruction and to get called back on each event.  It's
+// legal to call back into the ValueMap from a Config's callbacks.  Config
+// parameters should inherit from ValueMapConfig<KeyT> to get default
+// implementations of all the methods ValueMap uses.  See ValueMapConfig for
+// documentation of the functions you can override.
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,6 +43,9 @@ class ValueMapIterator;
 template<typename DenseMapT, typename KeyT>
 class ValueMapConstIterator;
 
+/// This class defines the default behavior for configurable aspects of
+/// ValueMap<>.  User Configs should inherit from this class to be as compatible
+/// as possible with future versions of ValueMap.
 template<typename KeyT>
 struct ValueMapConfig {
   /// If FollowRAUW is true, the ValueMap will update mappings on RAUW. If it's
@@ -46,27 +61,17 @@ struct ValueMapConfig {
   template<typename ExtraDataT>
   static void onRAUW(const ExtraDataT &Data, KeyT Old, KeyT New) {}
   template<typename ExtraDataT>
-  static void onDeleted(const ExtraDataT &Data, KeyT Old) {}
+  static void onDelete(const ExtraDataT &Data, KeyT Old) {}
 
   /// Returns a mutex that should be acquired around any changes to the map.
   /// This is only acquired from the CallbackVH (and held around calls to onRAUW
-  /// and onDeleted) and not inside other ValueMap methods.  NULL means that no
+  /// and onDelete) and not inside other ValueMap methods.  NULL means that no
   /// mutex is necessary.
   template<typename ExtraDataT>
   static sys::Mutex *getMutex(const ExtraDataT &Data) { return NULL; }
 };
 
-/// ValueMap maps Value* or any subclass to an arbitrary other
-/// type. It provides the DenseMap interface.  When the key values are
-/// deleted or RAUWed, ValueMap relies on the Config to decide what to
-/// do.  Config parameters should inherit from ValueMapConfig<KeyT> to
-/// get default implementations of all the methods ValueMap uses.
-///
-/// By default, when a key is RAUWed from V1 to V2, the old mapping
-/// V1->target is removed, and a new mapping V2->target is added.  If
-/// V2 already existed, its old target is overwritten.  When a key is
-/// deleted, its mapping is removed.  You can override Config to get
-/// called back on each event.
+/// See the file comment.
 template<typename KeyT, typename ValueT, typename Config = ValueMapConfig<KeyT>,
          typename ValueInfoT = DenseMapInfo<ValueT> >
 class ValueMap {
@@ -177,6 +182,9 @@ public:
   }
 
 private:
+  // Takes a key being looked up in the map and wraps it into a
+  // ValueMapCallbackVH, the actual key type of the map.  We use a helper
+  // function because ValueMapCVH is constructed with a second parameter.
   ValueMapCVH Wrap(KeyT key) const {
     // The only way the resulting CallbackVH could try to modify *this (making
     // the const_cast incorrect) is if it gets inserted into the map.  But then
@@ -186,10 +194,12 @@ private:
   }
 };
 
+// This CallbackVH updates its ValueMap when the contained Value changes,
+// according to the user's preferences expressed through the Config object.
 template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
 class ValueMapCallbackVH : public CallbackVH {
   friend class ValueMap<KeyT, ValueT, Config, ValueInfoT>;
-  friend class DenseMapInfo<ValueMapCallbackVH>;
+  friend struct DenseMapInfo<ValueMapCallbackVH>;
   typedef ValueMap<KeyT, ValueT, Config, ValueInfoT> ValueMapT;
   typedef typename llvm::remove_pointer<KeyT>::type KeySansPointerT;
 
@@ -208,7 +218,7 @@ public:
     sys::Mutex *M = Config::getMutex(Copy.Map->Data);
     if (M)
       M->acquire();
-    Config::onDeleted(Copy.Map->Data, Copy.Unwrap());  // May destroy *this.
+    Config::onDelete(Copy.Map->Data, Copy.Unwrap());  // May destroy *this.
     Copy.Map->Map.erase(Copy);  // Definitely destroys *this.
     if (M)
       M->release();
@@ -279,7 +289,7 @@ public:
   struct ValueTypeProxy {
     const KeyT first;
     ValueT& second;
-    ValueTypeProxy *operator->()  { return this; }
+    ValueTypeProxy *operator->() { return this; }
     operator std::pair<KeyT, ValueT>() const {
       return std::make_pair(first, second);
     }
@@ -301,11 +311,11 @@ public:
     return I != RHS.I;
   }
 
-  inline ValueMapIterator& operator++() {          // Preincrement
+  inline ValueMapIterator& operator++() {  // Preincrement
     ++I;
     return *this;
   }
-  ValueMapIterator operator++(int) {        // Postincrement
+  ValueMapIterator operator++(int) {  // Postincrement
     ValueMapIterator tmp = *this; ++*this; return tmp;
   }
 };
@@ -329,7 +339,7 @@ public:
   struct ValueTypeProxy {
     const KeyT first;
     const ValueT& second;
-    ValueTypeProxy *operator->()  { return this; }
+    ValueTypeProxy *operator->() { return this; }
     operator std::pair<KeyT, ValueT>() const {
       return std::make_pair(first, second);
     }
@@ -351,11 +361,11 @@ public:
     return I != RHS.I;
   }
 
-  inline ValueMapConstIterator& operator++() {          // Preincrement
+  inline ValueMapConstIterator& operator++() {  // Preincrement
     ++I;
     return *this;
   }
-  ValueMapConstIterator operator++(int) {        // Postincrement
+  ValueMapConstIterator operator++(int) {  // Postincrement
     ValueMapConstIterator tmp = *this; ++*this; return tmp;
   }
 };
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 239f30f..42a377e 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -29,7 +29,6 @@ namespace llvm {
 class AliasAnalysis;
 class LoadInst;
 class StoreInst;
-class FreeInst;
 class VAArgInst;
 class AliasSetTracker;
 class AliasSet;
@@ -298,7 +297,6 @@ public:
   bool add(Value *Ptr, unsigned Size);  // Add a location
   bool add(LoadInst *LI);
   bool add(StoreInst *SI);
-  bool add(FreeInst *FI);
   bool add(VAArgInst *VAAI);
   bool add(CallSite CS);          // Call/Invoke instructions
   bool add(CallInst *CI)   { return add(CallSite(CI)); }
@@ -313,7 +311,6 @@ public:
   bool remove(Value *Ptr, unsigned Size);  // Remove a location
   bool remove(LoadInst *LI);
   bool remove(StoreInst *SI);
-  bool remove(FreeInst *FI);
   bool remove(VAArgInst *VAAI);
   bool remove(CallSite CS);
   bool remove(CallInst *CI)   { return remove(CallSite(CI)); }
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index f76aa46..cfe3632 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -514,6 +514,13 @@ namespace llvm {
                                 uint64_t OffsetInBits, unsigned Flags,
                                 unsigned Encoding);
 
+    /// CreateBasicType - Create a basic type like int, float, etc.
+    DIBasicType CreateBasicTypeEx(DIDescriptor Context, StringRef Name,
+                                DICompileUnit CompileUnit, unsigned LineNumber,
+                                Constant *SizeInBits, Constant *AlignInBits,
+                                Constant *OffsetInBits, unsigned Flags,
+                                unsigned Encoding);
+
     /// CreateDerivedType - Create a derived type like const qualified type,
     /// pointer, typedef, etc.
     DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context,
@@ -524,6 +531,16 @@ namespace llvm {
                                     uint64_t OffsetInBits, unsigned Flags,
                                     DIType DerivedFrom);
 
+    /// CreateDerivedType - Create a derived type like const qualified type,
+    /// pointer, typedef, etc.
+    DIDerivedType CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context,
+                                        StringRef Name,
+                                    DICompileUnit CompileUnit,
+                                    unsigned LineNumber,
+                                    Constant *SizeInBits, Constant *AlignInBits,
+                                    Constant *OffsetInBits, unsigned Flags,
+                                    DIType DerivedFrom);
+
     /// CreateCompositeType - Create a composite type like array, struct, etc.
     DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context,
                                         StringRef Name,
@@ -536,6 +553,18 @@ namespace llvm {
                                         DIArray Elements,
                                         unsigned RunTimeLang = 0);
 
+    /// CreateCompositeType - Create a composite type like array, struct, etc.
+    DICompositeType CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context,
+                                        StringRef Name,
+                                        DICompileUnit CompileUnit,
+                                        unsigned LineNumber,
+                                        Constant *SizeInBits,
+                                        Constant *AlignInBits,
+                                        Constant *OffsetInBits, unsigned Flags,
+                                        DIType DerivedFrom,
+                                        DIArray Elements,
+                                        unsigned RunTimeLang = 0);
+
     /// CreateSubprogram - Create a new descriptor for the specified subprogram.
     /// See comments in DISubprogram for descriptions of these fields.
     DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name,
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 59ce6e7..17aaf95 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -905,9 +905,9 @@ public:
   iterator       find(BasicBlock *B)       { return Frontiers.find(B); }
   const_iterator find(BasicBlock *B) const { return Frontiers.find(B); }
 
-  void addBasicBlock(BasicBlock *BB, const DomSetType &frontier) {
+  iterator addBasicBlock(BasicBlock *BB, const DomSetType &frontier) {
     assert(find(BB) == end() && "Block already in DominanceFrontier!");
-    Frontiers.insert(std::make_pair(BB, frontier));
+    return Frontiers.insert(std::make_pair(BB, frontier)).first;
   }
 
   /// removeBlock - Remove basic block BB's frontier.
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 7631110..bc87adb 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -114,10 +114,10 @@ public:
   block_iterator block_begin() const { return Blocks.begin(); }
   block_iterator block_end() const { return Blocks.end(); }
 
-  /// isLoopExit - True if terminator in the block can branch to another block
+  /// isLoopExiting - True if terminator in the block can branch to another block
   /// that is outside of the current loop.
   ///
-  bool isLoopExit(const BlockT *BB) const {
+  bool isLoopExiting(const BlockT *BB) const {
     typedef GraphTraits<BlockT*> BlockTraits;
     for (typename BlockTraits::ChildIteratorType SI =
          BlockTraits::child_begin(const_cast<BlockT*>(BB)),
@@ -465,7 +465,7 @@ public:
       WriteAsOperand(OS, BB, false);
       if (BB == getHeader())    OS << "<header>";
       if (BB == getLoopLatch()) OS << "<latch>";
-      if (isLoopExit(BB))       OS << "<exit>";
+      if (isLoopExiting(BB))    OS << "<exiting>";
     }
     OS << "\n";
 
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
new file mode 100644
index 0000000..5fd0bb0
--- /dev/null
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -0,0 +1,79 @@
+//===- llvm/Analysis/MemoryBuiltins.h- Calls to memory builtins -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.  
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H
+#define LLVM_ANALYSIS_MEMORYBUILTINS_H
+
+namespace llvm {
+class CallInst;
+class LLVMContext;
+class PointerType;
+class TargetData;
+class Type;
+class Value;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the value is either a malloc call or a bitcast of 
+/// the result of a malloc call
+bool isMalloc(const Value* I);
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst* extractMallocCall(const Value* I);
+CallInst* extractMallocCall(Value* I);
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+const CallInst* extractMallocCallFromBitCast(const Value* I);
+CallInst* extractMallocCallFromBitCast(Value* I);
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1.  Otherwise, return NULL.
+CallInst* isArrayMalloc(Value* I, LLVMContext &Context, const TargetData* TD);
+const CallInst* isArrayMalloc(const Value* I, LLVMContext &Context,
+                              const TargetData* TD);
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// This PointerType is the result type of the call's only bitcast use.
+/// If there is no unique bitcast use, then return NULL.
+const PointerType* getMallocType(const CallInst* CI);
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
+/// Type is the result type of the call's only bitcast use. If there is no
+/// unique bitcast use, then return NULL.
+const Type* getMallocAllocatedType(const CallInst* CI);
+
+/// getMallocArraySize - Returns the array size of a malloc call.  If the 
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple.  For non-array mallocs, the multiple is
+/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value* getMallocArraySize(CallInst* CI, LLVMContext &Context,
+                          const TargetData* TD);
+                          
+//===----------------------------------------------------------------------===//
+//  free Call Utility Functions.
+//
+
+/// isFreeCall - Returns true if the the value is a call to the builtin free()
+bool isFreeCall(const Value* I);
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index ed5d18e..4aa3dfa 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -24,7 +24,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ConstantRange.h"
@@ -563,11 +563,10 @@ namespace llvm {
     /// has an analyzable loop-invariant backedge-taken count.
     bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
 
-    /// forgetLoopBackedgeTakenCount - This method should be called by the
-    /// client when it has changed a loop in a way that may effect
-    /// ScalarEvolution's ability to compute a trip count, or if the loop
-    /// is deleted.
-    void forgetLoopBackedgeTakenCount(const Loop *L);
+    /// forgetLoop - This method should be called by the client when it has
+    /// changed a loop in a way that may effect ScalarEvolution's ability to
+    /// compute a trip count, or if the loop is deleted.
+    void forgetLoop(const Loop *L);
 
     /// GetMinTrailingZeros - Determine the minimum number of zero bits that S
     /// is guaranteed to end in (at every loop iteration).  It is, at the same
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 212b5d1..f233608 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_ANALYSIS_VALUETRACKING_H
 #define LLVM_ANALYSIS_VALUETRACKING_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index b497827..ba4caeb 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -17,12 +17,13 @@
 #include "llvm/Instruction.h"
 #include "llvm/SymbolTableListTraits.h"
 #include "llvm/ADT/ilist.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
 class TerminatorInst;
 class LLVMContext;
+class BlockAddress;
 
 template<> struct ilist_traits<Instruction>
   : public SymbolTableListTraits<Instruction, BasicBlock> {
@@ -66,7 +67,7 @@ private:
 /// @brief LLVM Basic Block Representation
 class BasicBlock : public Value, // Basic blocks are data objects also
                    public ilist_node<BasicBlock> {
-
+  friend class BlockAddress;
 public:
   typedef iplist<Instruction> InstListType;
 private:
@@ -108,10 +109,10 @@ public:
         Function *getParent()       { return Parent; }
 
   /// use_back - Specialize the methods defined in Value, as we know that an
-  /// BasicBlock can only be used by Instructions (specifically PHI nodes and
-  /// terminators).
-  Instruction       *use_back()       { return cast<Instruction>(*use_begin());}
-  const Instruction *use_back() const { return cast<Instruction>(*use_begin());}
+  /// BasicBlock can only be used by Users (specifically PHI nodes, terminators,
+  /// and BlockAddress's).
+  User       *use_back()       { return cast<User>(*use_begin());}
+  const User *use_back() const { return cast<User>(*use_begin());}
   
   /// getTerminator() - If this is a well formed basic block, then this returns
   /// a pointer to the terminator instruction.  If it is not, then you get a
@@ -235,6 +236,19 @@ public:
   /// keeping loop information consistent, use the SplitBlock utility function.
   ///
   BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
+
+  /// hasAddressTaken - returns true if there are any uses of this basic block
+  /// other than direct branches, switches, etc. to it.
+  bool hasAddressTaken() const { return SubclassData != 0; }
+                     
+private:
+  /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
+  /// objects using it.  This is almost always 0, sometimes one, possibly but
+  /// almost never 2, and inconceivably 3 or more.
+  void AdjustBlockAddressRefCount(int Amt) {
+    SubclassData += Amt;
+    assert((int)(char)SubclassData >= 0 && "Refcount wrap-around");
+  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index 449dc35..ada2e65 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -19,7 +19,7 @@
 #define LLVM_BITCODE_BITCODES_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/Bitcode/Deserialize.h b/include/llvm/Bitcode/Deserialize.h
index 3e90227..90a5141 100644
--- a/include/llvm/Bitcode/Deserialize.h
+++ b/include/llvm/Bitcode/Deserialize.h
@@ -20,7 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index dccd8e0..c037399 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -138,7 +138,8 @@ namespace bitc {
     CST_CODE_CE_CMP        = 17,  // CE_CMP:        [opty, opval, opval, pred]
     CST_CODE_INLINEASM     = 18,  // INLINEASM:     [sideeffect,asmstr,conststr]
     CST_CODE_CE_SHUFVEC_EX = 19,  // SHUFVEC_EX:    [opty, opval, opval, opval]
-    CST_CODE_CE_INBOUNDS_GEP = 20 // INBOUNDS_GEP:  [n x operands]
+    CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP:  [n x operands]
+    CST_CODE_BLOCKADDRESS  = 21   // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
   };
 
   /// CastOpcodes - These are values used in the bitcode files to encode which
@@ -209,7 +210,7 @@ namespace bitc {
 
     FUNC_CODE_INST_RET         = 10, // RET:        [opty,opval<both optional>]
     FUNC_CODE_INST_BR          = 11, // BR:         [bb#, bb#, cond] or [bb#]
-    FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, opval, n, n x ops]
+    FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, op0, op1, ...]
     FUNC_CODE_INST_INVOKE      = 13, // INVOKE:     [attr, fnty, op0,op1, ...]
     FUNC_CODE_INST_UNWIND      = 14, // UNWIND
     FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
@@ -236,7 +237,8 @@ namespace bitc {
     FUNC_CODE_INST_CMP2        = 28, // CMP2:       [opty, opval, opval, pred]
     // new select on i1 or [N x i1]
     FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
-    FUNC_CODE_INST_INBOUNDS_GEP = 30 // INBOUNDS_GEP: [n x operands]
+    FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
+    FUNC_CODE_INST_INDIRECTBR  = 31  // INDIRECTBR: [opty, op0, op1, ...]
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 62d0679..a0bd330 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
+  class BlockAddress;
   class GCStrategy;
   class Constant;
   class ConstantArray;
@@ -334,6 +335,12 @@ namespace llvm {
     /// block label.
     MCSymbol *GetMBBSymbol(unsigned MBBID) const;
     
+    /// GetBlockAddressSymbol - Return the MCSymbol used to satisfy BlockAddress
+    /// uses of the specified basic block.
+    MCSymbol *GetBlockAddressSymbol(const BlockAddress *BA) const;
+    MCSymbol *GetBlockAddressSymbol(const Function *F,
+                                    const BasicBlock *BB) const;
+
     /// EmitBasicBlockStart - This method prints the label for the specified
     /// MachineBasicBlock, an alignment (if present) and a comment describing
     /// it if appropriate.
diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h
index 2d4bd73..9e2ef18 100644
--- a/include/llvm/CodeGen/BinaryObject.h
+++ b/include/llvm/CodeGen/BinaryObject.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_BINARYOBJECT_H
 #define LLVM_CODEGEN_BINARYOBJECT_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 #include <string>
 #include <vector>
diff --git a/include/llvm/CodeGen/ELFRelocation.h b/include/llvm/CodeGen/ELFRelocation.h
index c3f88f1..e58b8df 100644
--- a/include/llvm/CodeGen/ELFRelocation.h
+++ b/include/llvm/CodeGen/ELFRelocation.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_CODEGEN_ELF_RELOCATION_H
 #define LLVM_CODEGEN_ELF_RELOCATION_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 180783a..792fb59 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -18,7 +18,7 @@
 #define LLVM_CODEGEN_JITCODEEMITTER_H
 
 #include <string>
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 71fae2a..a7cebee 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -39,12 +39,21 @@ namespace llvm {
     /// predecessor for.  This is used as a tie-breaker heuristic for better
     /// mobility.
     std::vector<unsigned> NumNodesSolelyBlocking;
-
+    
+    /// IgnoreAntiDep - Ignore anti-dependencies
+    bool IgnoreAntiDep;
+    
+    /// Queue - The queue.
     PriorityQueue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
+
 public:
-    LatencyPriorityQueue() : Queue(latency_sort(this)) {
+  LatencyPriorityQueue() : IgnoreAntiDep(false), Queue(latency_sort(this)) {
     }
-    
+
+    void setIgnoreAntiDep(bool ignore) {
+      IgnoreAntiDep = ignore;
+    }
+
     void initNodes(std::vector<SUnit> &sunits) {
       SUnits = &sunits;
       NumNodesSolelyBlocking.resize(SUnits->size(), 0);
@@ -63,7 +72,7 @@ public:
     
     unsigned getLatency(unsigned NodeNum) const {
       assert(NodeNum < (*SUnits).size());
-      return (*SUnits)[NodeNum].getHeight();
+      return (*SUnits)[NodeNum].getHeight(IgnoreAntiDep);
     }
     
     unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 05bd173..e31a7f0 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -21,221 +21,19 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_H
 #define LLVM_CODEGEN_LIVEINTERVAL_H
 
-#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/AlignOf.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include <cassert>
 #include <climits>
 
 namespace llvm {
+  class LiveIntervals;
   class MachineInstr;
   class MachineRegisterInfo;
   class TargetRegisterInfo;
   class raw_ostream;
-  
-  /// LiveIndex - An opaque wrapper around machine indexes.
-  class LiveIndex {
-    friend class VNInfo;
-    friend class LiveInterval;
-    friend class LiveIntervals;
-    friend struct DenseMapInfo<LiveIndex>;
-
-  public:
-
-    enum Slot { LOAD, USE, DEF, STORE, NUM };
-
-  private:
-
-    unsigned index;
-
-    static const unsigned PHI_BIT = 1 << 31;
-
-  public:
-
-    /// Construct a default LiveIndex pointing to a reserved index.
-    LiveIndex() : index(0) {}
-
-    /// Construct an index from the given index, pointing to the given slot.
-    LiveIndex(LiveIndex m, Slot s)
-      : index((m.index / NUM) * NUM + s) {} 
-    
-    /// Print this index to the given raw_ostream.
-    void print(raw_ostream &os) const;
-
-    /// Compare two LiveIndex objects for equality.
-    bool operator==(LiveIndex other) const {
-      return ((index & ~PHI_BIT) == (other.index & ~PHI_BIT));
-    }
-    /// Compare two LiveIndex objects for inequality.
-    bool operator!=(LiveIndex other) const {
-      return ((index & ~PHI_BIT) != (other.index & ~PHI_BIT));
-    }
-   
-    /// Compare two LiveIndex objects. Return true if the first index
-    /// is strictly lower than the second.
-    bool operator<(LiveIndex other) const {
-      return ((index & ~PHI_BIT) < (other.index & ~PHI_BIT));
-    }
-    /// Compare two LiveIndex objects. Return true if the first index
-    /// is lower than, or equal to, the second.
-    bool operator<=(LiveIndex other) const {
-      return ((index & ~PHI_BIT) <= (other.index & ~PHI_BIT));
-    }
-
-    /// Compare two LiveIndex objects. Return true if the first index
-    /// is greater than the second.
-    bool operator>(LiveIndex other) const {
-      return ((index & ~PHI_BIT) > (other.index & ~PHI_BIT));
-    }
-
-    /// Compare two LiveIndex objects. Return true if the first index
-    /// is greater than, or equal to, the second.
-    bool operator>=(LiveIndex other) const {
-      return ((index & ~PHI_BIT) >= (other.index & ~PHI_BIT));
-    }
-
-    /// Returns true if this index represents a load.
-    bool isLoad() const {
-      return ((index % NUM) == LOAD);
-    }
-
-    /// Returns true if this index represents a use.
-    bool isUse() const {
-      return ((index % NUM) == USE);
-    }
-
-    /// Returns true if this index represents a def.
-    bool isDef() const {
-      return ((index % NUM) == DEF);
-    }
-
-    /// Returns true if this index represents a store.
-    bool isStore() const {
-      return ((index % NUM) == STORE);
-    }
-
-    /// Returns the slot for this LiveIndex.
-    Slot getSlot() const {
-      return static_cast<Slot>(index % NUM);
-    }
-
-    /// Returns true if this index represents a non-PHI use/def.
-    bool isNonPHIIndex() const {
-      return ((index & PHI_BIT) == 0);
-    }
-
-    /// Returns true if this index represents a PHI use/def.
-    bool isPHIIndex() const {
-      return ((index & PHI_BIT) == PHI_BIT);
-    }
-
-  private:
-
-    /// Construct an index from the given index, with its PHI kill marker set.
-    LiveIndex(bool phi, LiveIndex o) : index(o.index) {
-      if (phi)
-        index |= PHI_BIT;
-      else
-        index &= ~PHI_BIT;
-    }
-
-    explicit LiveIndex(unsigned idx)
-      : index(idx & ~PHI_BIT) {}
-
-    LiveIndex(bool phi, unsigned idx)
-      : index(idx & ~PHI_BIT) {
-      if (phi)
-        index |= PHI_BIT;
-    }
-
-    LiveIndex(bool phi, unsigned idx, Slot slot)
-      : index(((idx / NUM) * NUM + slot) & ~PHI_BIT) {
-      if (phi)
-        index |= PHI_BIT;
-    }
-    
-    LiveIndex nextSlot_() const {
-      assert((index & PHI_BIT) == ((index + 1) & PHI_BIT) &&
-             "Index out of bounds.");
-      return LiveIndex(index + 1);
-    }
-
-    LiveIndex nextIndex_() const {
-      assert((index & PHI_BIT) == ((index + NUM) & PHI_BIT) &&
-             "Index out of bounds.");
-      return LiveIndex(index + NUM);
-    }
-
-    LiveIndex prevSlot_() const {
-      assert((index & PHI_BIT) == ((index - 1) & PHI_BIT) &&
-             "Index out of bounds.");
-      return LiveIndex(index - 1);
-    }
-
-    LiveIndex prevIndex_() const {
-      assert((index & PHI_BIT) == ((index - NUM) & PHI_BIT) &&
-             "Index out of bounds.");
-      return LiveIndex(index - NUM);
-    }
-
-    int distance(LiveIndex other) const {
-      return (other.index & ~PHI_BIT) - (index & ~PHI_BIT);
-    }
-
-    /// Returns an unsigned number suitable as an index into a
-    /// vector over all instructions.
-    unsigned getVecIndex() const {
-      return (index & ~PHI_BIT) / NUM;
-    }
-
-    /// Scale this index by the given factor.
-    LiveIndex scale(unsigned factor) const {
-      unsigned i = (index & ~PHI_BIT) / NUM,
-               o = (index % ~PHI_BIT) % NUM;
-      assert(index <= (~0U & ~PHI_BIT) / (factor * NUM) &&
-             "Rescaled interval would overflow");
-      return LiveIndex(i * NUM * factor, o);
-    }
-
-    static LiveIndex emptyKey() {
-      return LiveIndex(true, 0x7fffffff);
-    }
-
-    static LiveIndex tombstoneKey() {
-      return LiveIndex(true, 0x7ffffffe);
-    }
-
-    static unsigned getHashValue(const LiveIndex &v) {
-      return v.index * 37;
-    }
-
-  };
-
-  inline raw_ostream& operator<<(raw_ostream &os, LiveIndex mi) {
-    mi.print(os);
-    return os;
-  }
-
-  /// Densemap specialization for LiveIndex.
-  template <>
-  struct DenseMapInfo<LiveIndex> {
-    static inline LiveIndex getEmptyKey() {
-      return LiveIndex::emptyKey();
-    }
-    static inline LiveIndex getTombstoneKey() {
-      return LiveIndex::tombstoneKey();
-    }
-    static inline unsigned getHashValue(const LiveIndex &v) {
-      return LiveIndex::getHashValue(v);
-    }
-    static inline bool isEqual(const LiveIndex &LHS,
-                               const LiveIndex &RHS) {
-      return (LHS == RHS);
-    }
-    static inline bool isPod() { return true; }
-  };
-
 
   /// VNInfo - Value Number Information.
   /// This class holds information about a machine level values, including
@@ -270,23 +68,25 @@ namespace llvm {
 
   public:
 
-    typedef SmallVector<LiveIndex, 4> KillSet;
+    typedef SmallVector<SlotIndex, 4> KillSet;
 
     /// The ID number of this value.
     unsigned id;
     
     /// The index of the defining instruction (if isDefAccurate() returns true).
-    LiveIndex def;
+    SlotIndex def;
 
     KillSet kills;
 
-    VNInfo()
-      : flags(IS_UNUSED), id(~1U) { cr.copy = 0; }
+    /*
+    VNInfo(LiveIntervals &li_)
+      : defflags(IS_UNUSED), id(~1U) { cr.copy = 0; }
+    */
 
     /// VNInfo constructor.
     /// d is presumed to point to the actual defining instr. If it doesn't
     /// setIsDefAccurate(false) should be called after construction.
-    VNInfo(unsigned i, LiveIndex d, MachineInstr *c)
+    VNInfo(unsigned i, SlotIndex d, MachineInstr *c)
       : flags(IS_DEF_ACCURATE), id(i), def(d) { cr.copy = c; }
 
     /// VNInfo construtor, copies values from orig, except for the value number.
@@ -377,7 +177,7 @@ namespace llvm {
     }
 
     /// Returns true if the given index is a kill of this value.
-    bool isKill(LiveIndex k) const {
+    bool isKill(SlotIndex k) const {
       KillSet::const_iterator
         i = std::lower_bound(kills.begin(), kills.end(), k);
       return (i != kills.end() && *i == k);
@@ -385,7 +185,7 @@ namespace llvm {
 
     /// addKill - Add a kill instruction index to the specified value
     /// number.
-    void addKill(LiveIndex k) {
+    void addKill(SlotIndex k) {
       if (kills.empty()) {
         kills.push_back(k);
       } else {
@@ -397,7 +197,7 @@ namespace llvm {
 
     /// Remove the specified kill index from this value's kills list.
     /// Returns true if the value was present, otherwise returns false.
-    bool removeKill(LiveIndex k) {
+    bool removeKill(SlotIndex k) {
       KillSet::iterator i = std::lower_bound(kills.begin(), kills.end(), k);
       if (i != kills.end() && *i == k) {
         kills.erase(i);
@@ -407,7 +207,7 @@ namespace llvm {
     }
 
     /// Remove all kills in the range [s, e).
-    void removeKills(LiveIndex s, LiveIndex e) {
+    void removeKills(SlotIndex s, SlotIndex e) {
       KillSet::iterator
         si = std::lower_bound(kills.begin(), kills.end(), s),
         se = std::upper_bound(kills.begin(), kills.end(), e);
@@ -421,11 +221,11 @@ namespace llvm {
   /// program, with an inclusive start point and an exclusive end point.
   /// These ranges are rendered as [start,end).
   struct LiveRange {
-    LiveIndex start;  // Start point of the interval (inclusive)
-    LiveIndex end;    // End point of the interval (exclusive)
+    SlotIndex start;  // Start point of the interval (inclusive)
+    SlotIndex end;    // End point of the interval (exclusive)
     VNInfo *valno;   // identifier for the value contained in this interval.
 
-    LiveRange(LiveIndex S, LiveIndex E, VNInfo *V)
+    LiveRange(SlotIndex S, SlotIndex E, VNInfo *V)
       : start(S), end(E), valno(V) {
 
       assert(S < E && "Cannot create empty or backwards range");
@@ -433,13 +233,13 @@ namespace llvm {
 
     /// contains - Return true if the index is covered by this range.
     ///
-    bool contains(LiveIndex I) const {
+    bool contains(SlotIndex I) const {
       return start <= I && I < end;
     }
 
     /// containsRange - Return true if the given range, [S, E), is covered by
     /// this range. 
-    bool containsRange(LiveIndex S, LiveIndex E) const {
+    bool containsRange(SlotIndex S, SlotIndex E) const {
       assert((S < E) && "Backwards interval?");
       return (start <= S && S < end) && (start < E && E <= end);
     }
@@ -461,11 +261,11 @@ namespace llvm {
   raw_ostream& operator<<(raw_ostream& os, const LiveRange &LR);
 
 
-  inline bool operator<(LiveIndex V, const LiveRange &LR) {
+  inline bool operator<(SlotIndex V, const LiveRange &LR) {
     return V < LR.start;
   }
 
-  inline bool operator<(const LiveRange &LR, LiveIndex V) {
+  inline bool operator<(const LiveRange &LR, SlotIndex V) {
     return LR.start < V;
   }
 
@@ -522,7 +322,7 @@ namespace llvm {
     /// end of the interval.  If no LiveRange contains this position, but the
     /// position is in a hole, this method returns an iterator pointing the the
     /// LiveRange immediately after the hole.
-    iterator advanceTo(iterator I, LiveIndex Pos) {
+    iterator advanceTo(iterator I, SlotIndex Pos) {
       if (Pos >= endIndex())
         return end();
       while (I->end <= Pos) ++I;
@@ -569,7 +369,7 @@ namespace llvm {
 
     /// getNextValue - Create a new value number and return it.  MIIdx specifies
     /// the instruction that defines the value number.
-    VNInfo *getNextValue(LiveIndex def, MachineInstr *CopyMI,
+    VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI,
                          bool isDefAccurate, BumpPtrAllocator &VNInfoAllocator){
       VNInfo *VNI =
         static_cast<VNInfo*>(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo),
@@ -625,13 +425,15 @@ namespace llvm {
     /// current interval, but are defined in the Clobbers interval, mark them
     /// used with an unknown definition value. Caller must pass in reference to
     /// VNInfoAllocator since it will create a new val#.
-    void MergeInClobberRanges(const LiveInterval &Clobbers,
+    void MergeInClobberRanges(LiveIntervals &li_,
+                              const LiveInterval &Clobbers,
                               BumpPtrAllocator &VNInfoAllocator);
 
     /// MergeInClobberRange - Same as MergeInClobberRanges except it merge in a
     /// single LiveRange only.
-    void MergeInClobberRange(LiveIndex Start,
-                             LiveIndex End,
+    void MergeInClobberRange(LiveIntervals &li_,
+                             SlotIndex Start,
+                             SlotIndex End,
                              BumpPtrAllocator &VNInfoAllocator);
 
     /// MergeValueInAsValue - Merge all of the live ranges of a specific val#
@@ -657,56 +459,54 @@ namespace llvm {
     bool empty() const { return ranges.empty(); }
 
     /// beginIndex - Return the lowest numbered slot covered by interval.
-    LiveIndex beginIndex() const {
-      if (empty())
-        return LiveIndex();
+    SlotIndex beginIndex() const {
+      assert(!empty() && "Call to beginIndex() on empty interval.");
       return ranges.front().start;
     }
 
     /// endNumber - return the maximum point of the interval of the whole,
     /// exclusive.
-    LiveIndex endIndex() const {
-      if (empty())
-        return LiveIndex();
+    SlotIndex endIndex() const {
+      assert(!empty() && "Call to endIndex() on empty interval.");
       return ranges.back().end;
     }
 
-    bool expiredAt(LiveIndex index) const {
+    bool expiredAt(SlotIndex index) const {
       return index >= endIndex();
     }
 
-    bool liveAt(LiveIndex index) const;
+    bool liveAt(SlotIndex index) const;
 
     // liveBeforeAndAt - Check if the interval is live at the index and the
     // index just before it. If index is liveAt, check if it starts a new live
     // range.If it does, then check if the previous live range ends at index-1.
-    bool liveBeforeAndAt(LiveIndex index) const;
+    bool liveBeforeAndAt(SlotIndex index) const;
 
     /// getLiveRangeContaining - Return the live range that contains the
     /// specified index, or null if there is none.
-    const LiveRange *getLiveRangeContaining(LiveIndex Idx) const {
+    const LiveRange *getLiveRangeContaining(SlotIndex Idx) const {
       const_iterator I = FindLiveRangeContaining(Idx);
       return I == end() ? 0 : &*I;
     }
 
     /// getLiveRangeContaining - Return the live range that contains the
     /// specified index, or null if there is none.
-    LiveRange *getLiveRangeContaining(LiveIndex Idx) {
+    LiveRange *getLiveRangeContaining(SlotIndex Idx) {
       iterator I = FindLiveRangeContaining(Idx);
       return I == end() ? 0 : &*I;
     }
 
     /// FindLiveRangeContaining - Return an iterator to the live range that
     /// contains the specified index, or end() if there is none.
-    const_iterator FindLiveRangeContaining(LiveIndex Idx) const;
+    const_iterator FindLiveRangeContaining(SlotIndex Idx) const;
 
     /// FindLiveRangeContaining - Return an iterator to the live range that
     /// contains the specified index, or end() if there is none.
-    iterator FindLiveRangeContaining(LiveIndex Idx);
+    iterator FindLiveRangeContaining(SlotIndex Idx);
 
     /// findDefinedVNInfo - Find the by the specified
     /// index (register interval) or defined 
-    VNInfo *findDefinedVNInfoForRegInt(LiveIndex Idx) const;
+    VNInfo *findDefinedVNInfoForRegInt(SlotIndex Idx) const;
 
     /// findDefinedVNInfo - Find the VNInfo that's defined by the specified
     /// register (stack inteval only).
@@ -721,7 +521,7 @@ namespace llvm {
 
     /// overlaps - Return true if the live interval overlaps a range specified
     /// by [Start, End).
-    bool overlaps(LiveIndex Start, LiveIndex End) const;
+    bool overlaps(SlotIndex Start, SlotIndex End) const;
 
     /// overlapsFrom - Return true if the intersection of the two live intervals
     /// is not empty.  The specified iterator is a hint that we can begin
@@ -738,18 +538,19 @@ namespace llvm {
     /// join - Join two live intervals (this, and other) together.  This applies
     /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
     /// the intervals are not joinable, this aborts.
-    void join(LiveInterval &Other, const int *ValNoAssignments,
+    void join(LiveInterval &Other,
+              const int *ValNoAssignments,
               const int *RHSValNoAssignments,
               SmallVector<VNInfo*, 16> &NewVNInfo,
               MachineRegisterInfo *MRI);
 
     /// isInOneLiveRange - Return true if the range specified is entirely in the
     /// a single LiveRange of the live interval.
-    bool isInOneLiveRange(LiveIndex Start, LiveIndex End);
+    bool isInOneLiveRange(SlotIndex Start, SlotIndex End);
 
     /// removeRange - Remove the specified range from this interval.  Note that
     /// the range must be a single LiveRange in its entirety.
-    void removeRange(LiveIndex Start, LiveIndex End,
+    void removeRange(SlotIndex Start, SlotIndex End,
                      bool RemoveDeadValNo = false);
 
     void removeRange(LiveRange LR, bool RemoveDeadValNo = false) {
@@ -773,8 +574,8 @@ namespace llvm {
     void ComputeJoinedWeight(const LiveInterval &Other);
 
     bool operator<(const LiveInterval& other) const {
-      const LiveIndex &thisIndex = beginIndex();
-      const LiveIndex &otherIndex = other.beginIndex();
+      const SlotIndex &thisIndex = beginIndex();
+      const SlotIndex &otherIndex = other.beginIndex();
       return (thisIndex < otherIndex ||
               (thisIndex == otherIndex && reg < other.reg));
     }
@@ -785,8 +586,9 @@ namespace llvm {
   private:
 
     Ranges::iterator addRangeFrom(LiveRange LR, Ranges::iterator From);
-    void extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd);
-    Ranges::iterator extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStr);
+    void extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd);
+    Ranges::iterator extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStr);
+
     LiveInterval& operator=(const LiveInterval& rhs); // DO NOT IMPLEMENT
 
   };
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 511db6d..efb4a03 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -23,12 +23,14 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
 #include <cmath>
+#include <iterator>
 
 namespace llvm {
 
@@ -40,21 +42,6 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterClass;
   class VirtRegMap;
-  typedef std::pair<LiveIndex, MachineBasicBlock*> IdxMBBPair;
-
-  inline bool operator<(LiveIndex V, const IdxMBBPair &IM) {
-    return V < IM.first;
-  }
-
-  inline bool operator<(const IdxMBBPair &IM, LiveIndex V) {
-    return IM.first < V;
-  }
-
-  struct Idx2MBBCompare {
-    bool operator()(const IdxMBBPair &LHS, const IdxMBBPair &RHS) const {
-      return LHS.first < RHS.first;
-    }
-  };
   
   class LiveIntervals : public MachineFunctionPass {
     MachineFunction* mf_;
@@ -64,33 +51,15 @@ namespace llvm {
     const TargetInstrInfo* tii_;
     AliasAnalysis *aa_;
     LiveVariables* lv_;
+    SlotIndexes* indexes_;
 
     /// Special pool allocator for VNInfo's (LiveInterval val#).
     ///
     BumpPtrAllocator VNInfoAllocator;
 
-    /// MBB2IdxMap - The indexes of the first and last instructions in the
-    /// specified basic block.
-    std::vector<std::pair<LiveIndex, LiveIndex> > MBB2IdxMap;
-
-    /// Idx2MBBMap - Sorted list of pairs of index of first instruction
-    /// and MBB id.
-    std::vector<IdxMBBPair> Idx2MBBMap;
-
-    /// FunctionSize - The number of instructions present in the function
-    uint64_t FunctionSize;
-
-    typedef DenseMap<const MachineInstr*, LiveIndex> Mi2IndexMap;
-    Mi2IndexMap mi2iMap_;
-
-    typedef std::vector<MachineInstr*> Index2MiMap;
-    Index2MiMap i2miMap_;
-
     typedef DenseMap<unsigned, LiveInterval*> Reg2IntervalMap;
     Reg2IntervalMap r2iMap_;
 
-    DenseMap<MachineBasicBlock*, LiveIndex> terminatorGaps;
-
     /// phiJoinCopies - Copy instructions which are PHI joins.
     SmallVector<MachineInstr*, 16> phiJoinCopies;
 
@@ -100,48 +69,10 @@ namespace llvm {
     /// CloneMIs - A list of clones as result of re-materialization.
     std::vector<MachineInstr*> CloneMIs;
 
-    typedef LiveInterval::InstrSlots InstrSlots;
-
   public:
     static char ID; // Pass identification, replacement for typeid
     LiveIntervals() : MachineFunctionPass(&ID) {}
 
-    LiveIndex getBaseIndex(LiveIndex index) {
-      return LiveIndex(index, LiveIndex::LOAD);
-    }
-    LiveIndex getBoundaryIndex(LiveIndex index) {
-      return LiveIndex(index,
-        (LiveIndex::Slot)(LiveIndex::NUM - 1));
-    }
-    LiveIndex getLoadIndex(LiveIndex index) {
-      return LiveIndex(index, LiveIndex::LOAD);
-    }
-    LiveIndex getUseIndex(LiveIndex index) {
-      return LiveIndex(index, LiveIndex::USE);
-    }
-    LiveIndex getDefIndex(LiveIndex index) {
-      return LiveIndex(index, LiveIndex::DEF);
-    }
-    LiveIndex getStoreIndex(LiveIndex index) {
-      return LiveIndex(index, LiveIndex::STORE);
-    }    
-
-    LiveIndex getNextSlot(LiveIndex m) const {
-      return m.nextSlot_();
-    }
-
-    LiveIndex getNextIndex(LiveIndex m) const {
-      return m.nextIndex_();
-    }
-
-    LiveIndex getPrevSlot(LiveIndex m) const {
-      return m.prevSlot_();
-    }
-
-    LiveIndex getPrevIndex(LiveIndex m) const {
-      return m.prevIndex_();
-    }
-
     static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
       return (isDef + isUse) * powf(10.0F, (float)loopDepth);
     }
@@ -170,111 +101,18 @@ namespace llvm {
       return r2iMap_.count(reg);
     }
 
-    /// getMBBStartIdx - Return the base index of the first instruction in the
-    /// specified MachineBasicBlock.
-    LiveIndex getMBBStartIdx(MachineBasicBlock *MBB) const {
-      return getMBBStartIdx(MBB->getNumber());
-    }
-    LiveIndex getMBBStartIdx(unsigned MBBNo) const {
-      assert(MBBNo < MBB2IdxMap.size() && "Invalid MBB number!");
-      return MBB2IdxMap[MBBNo].first;
-    }
-
-    /// getMBBEndIdx - Return the store index of the last instruction in the
-    /// specified MachineBasicBlock.
-    LiveIndex getMBBEndIdx(MachineBasicBlock *MBB) const {
-      return getMBBEndIdx(MBB->getNumber());
-    }
-    LiveIndex getMBBEndIdx(unsigned MBBNo) const {
-      assert(MBBNo < MBB2IdxMap.size() && "Invalid MBB number!");
-      return MBB2IdxMap[MBBNo].second;
-    }
-
     /// getScaledIntervalSize - get the size of an interval in "units,"
     /// where every function is composed of one thousand units.  This
     /// measure scales properly with empty index slots in the function.
     double getScaledIntervalSize(LiveInterval& I) {
-      return (1000.0 / InstrSlots::NUM * I.getSize()) / i2miMap_.size();
+      return (1000.0 * I.getSize()) / indexes_->getIndexesLength();
     }
     
     /// getApproximateInstructionCount - computes an estimate of the number
     /// of instructions in a given LiveInterval.
     unsigned getApproximateInstructionCount(LiveInterval& I) {
       double IntervalPercentage = getScaledIntervalSize(I) / 1000.0;
-      return (unsigned)(IntervalPercentage * FunctionSize);
-    }
-
-    /// getMBBFromIndex - given an index in any instruction of an
-    /// MBB return a pointer the MBB
-    MachineBasicBlock* getMBBFromIndex(LiveIndex index) const {
-      std::vector<IdxMBBPair>::const_iterator I =
-        std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), index);
-      // Take the pair containing the index
-      std::vector<IdxMBBPair>::const_iterator J =
-        ((I != Idx2MBBMap.end() && I->first > index) ||
-         (I == Idx2MBBMap.end() && Idx2MBBMap.size()>0)) ? (I-1): I;
-
-      assert(J != Idx2MBBMap.end() && J->first <= index &&
-             index <= getMBBEndIdx(J->second) &&
-             "index does not correspond to an MBB");
-      return J->second;
-    }
-
-    /// getInstructionIndex - returns the base index of instr
-    LiveIndex getInstructionIndex(const MachineInstr* instr) const {
-      Mi2IndexMap::const_iterator it = mi2iMap_.find(instr);
-      assert(it != mi2iMap_.end() && "Invalid instruction!");
-      return it->second;
-    }
-
-    /// getInstructionFromIndex - given an index in any slot of an
-    /// instruction return a pointer the instruction
-    MachineInstr* getInstructionFromIndex(LiveIndex index) const {
-      // convert index to vector index
-      unsigned i = index.getVecIndex();
-      assert(i < i2miMap_.size() &&
-             "index does not correspond to an instruction");
-      return i2miMap_[i];
-    }
-
-    /// hasGapBeforeInstr - Return true if the previous instruction slot,
-    /// i.e. Index - InstrSlots::NUM, is not occupied.
-    bool hasGapBeforeInstr(LiveIndex Index) {
-      Index = getBaseIndex(getPrevIndex(Index));
-      return getInstructionFromIndex(Index) == 0;
-    }
-
-    /// hasGapAfterInstr - Return true if the successive instruction slot,
-    /// i.e. Index + InstrSlots::Num, is not occupied.
-    bool hasGapAfterInstr(LiveIndex Index) {
-      Index = getBaseIndex(getNextIndex(Index));
-      return getInstructionFromIndex(Index) == 0;
-    }
-
-    /// findGapBeforeInstr - Find an empty instruction slot before the
-    /// specified index. If "Furthest" is true, find one that's furthest
-    /// away from the index (but before any index that's occupied).
-    LiveIndex findGapBeforeInstr(LiveIndex Index, bool Furthest = false) {
-      Index = getBaseIndex(getPrevIndex(Index));
-      if (getInstructionFromIndex(Index))
-        return LiveIndex();  // No gap!
-      if (!Furthest)
-        return Index;
-      LiveIndex PrevIndex = getBaseIndex(getPrevIndex(Index));
-      while (getInstructionFromIndex(Index)) {
-        Index = PrevIndex;
-        PrevIndex = getBaseIndex(getPrevIndex(Index));
-      }
-      return Index;
-    }
-
-    /// InsertMachineInstrInMaps - Insert the specified machine instruction
-    /// into the instruction index map at the given index.
-    void InsertMachineInstrInMaps(MachineInstr *MI, LiveIndex Index) {
-      i2miMap_[Index.getVecIndex()] = MI;
-      Mi2IndexMap::iterator it = mi2iMap_.find(MI);
-      assert(it == mi2iMap_.end() && "Already in map!");
-      mi2iMap_[MI] = Index;
+      return (unsigned)(IntervalPercentage * indexes_->getFunctionSize());
     }
 
     /// conflictsWithPhysRegDef - Returns true if the specified register
@@ -288,19 +126,7 @@ namespace llvm {
                                  bool CheckUse,
                                  SmallPtrSet<MachineInstr*,32> &JoinedCopies);
 
-    /// findLiveInMBBs - Given a live range, if the value of the range
-    /// is live in any MBB returns true as well as the list of basic blocks
-    /// in which the value is live.
-    bool findLiveInMBBs(LiveIndex Start, LiveIndex End,
-                        SmallVectorImpl<MachineBasicBlock*> &MBBs) const;
-
-    /// findReachableMBBs - Return a list MBB that can be reached via any
-    /// branch or fallthroughs. Return true if the list is not empty.
-    bool findReachableMBBs(LiveIndex Start, LiveIndex End,
-                        SmallVectorImpl<MachineBasicBlock*> &MBBs) const;
-
     // Interval creation
-
     LiveInterval &getOrCreateInterval(unsigned reg) {
       Reg2IntervalMap::iterator I = r2iMap_.find(reg);
       if (I == r2iMap_.end())
@@ -325,36 +151,75 @@ namespace llvm {
       r2iMap_.erase(I);
     }
 
+    SlotIndex getZeroIndex() const {
+      return indexes_->getZeroIndex();
+    }
+
+    SlotIndex getInvalidIndex() const {
+      return indexes_->getInvalidIndex();
+    }
+
     /// isNotInMIMap - returns true if the specified machine instr has been
     /// removed or was never entered in the map.
-    bool isNotInMIMap(MachineInstr* instr) const {
-      return !mi2iMap_.count(instr);
+    bool isNotInMIMap(const MachineInstr* Instr) const {
+      return !indexes_->hasIndex(Instr);
+    }
+
+    /// Returns the base index of the given instruction.
+    SlotIndex getInstructionIndex(const MachineInstr *instr) const {
+      return indexes_->getInstructionIndex(instr);
+    }
+    
+    /// Returns the instruction associated with the given index.
+    MachineInstr* getInstructionFromIndex(SlotIndex index) const {
+      return indexes_->getInstructionFromIndex(index);
+    }
+
+    /// Return the first index in the given basic block.
+    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+      return indexes_->getMBBStartIdx(mbb);
+    } 
+
+    /// Return the last index in the given basic block.
+    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
+      return indexes_->getMBBEndIdx(mbb);
+    } 
+
+    MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
+      return indexes_->getMBBFromIndex(index);
+    }
+
+    bool hasGapBeforeInstr(SlotIndex index) {
+      return indexes_->hasGapBeforeInstr(index);
+    }
+
+    bool hasGapAfterInstr(SlotIndex index) {
+      return indexes_->hasGapAfterInstr(index);
+    }
+
+    SlotIndex findGapBeforeInstr(SlotIndex index, bool furthest = false) {
+      return indexes_->findGapBeforeInstr(index, furthest);
+    }
+
+    void InsertMachineInstrInMaps(MachineInstr *MI, SlotIndex Index) {
+      indexes_->insertMachineInstrInMaps(MI, Index);
     }
 
-    /// RemoveMachineInstrFromMaps - This marks the specified machine instr as
-    /// deleted.
     void RemoveMachineInstrFromMaps(MachineInstr *MI) {
-      // remove index -> MachineInstr and
-      // MachineInstr -> index mappings
-      Mi2IndexMap::iterator mi2i = mi2iMap_.find(MI);
-      if (mi2i != mi2iMap_.end()) {
-        i2miMap_[mi2i->second.index/InstrSlots::NUM] = 0;
-        mi2iMap_.erase(mi2i);
-      }
+      indexes_->removeMachineInstrFromMaps(MI);
     }
 
-    /// ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in
-    /// maps used by register allocator.
     void ReplaceMachineInstrInMaps(MachineInstr *MI, MachineInstr *NewMI) {
-      Mi2IndexMap::iterator mi2i = mi2iMap_.find(MI);
-      if (mi2i == mi2iMap_.end())
-        return;
-      i2miMap_[mi2i->second.index/InstrSlots::NUM] = NewMI;
-      Mi2IndexMap::iterator it = mi2iMap_.find(MI);
-      assert(it != mi2iMap_.end() && "Invalid instruction!");
-      LiveIndex Index = it->second;
-      mi2iMap_.erase(it);
-      mi2iMap_[NewMI] = Index;
+      indexes_->replaceMachineInstrInMaps(MI, NewMI);
+    }
+
+    bool findLiveInMBBs(SlotIndex Start, SlotIndex End,
+                        SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
+      return indexes_->findLiveInMBBs(Start, End, MBBs);
+    }
+
+    void renumber() {
+      indexes_->renumber();
     }
 
     BumpPtrAllocator& getVNInfoAllocator() { return VNInfoAllocator; }
@@ -417,13 +282,6 @@ namespace llvm {
     /// marker to implicit_def defs and their uses.
     void processImplicitDefs();
 
-    /// computeNumbering - Compute the index numbering.
-    void computeNumbering();
-
-    /// scaleNumbering - Rescale interval numbers to introduce gaps for new
-    /// instructions
-    void scaleNumbering(int factor);
-
     /// intervalIsInOneMBB - Returns true if the specified interval is entirely
     /// within a single basic block.
     bool intervalIsInOneMBB(const LiveInterval &li) const;
@@ -443,14 +301,14 @@ namespace llvm {
     /// handleVirtualRegisterDef)
     void handleRegisterDef(MachineBasicBlock *MBB,
                            MachineBasicBlock::iterator MI,
-                           LiveIndex MIIdx,
+                           SlotIndex MIIdx,
                            MachineOperand& MO, unsigned MOIdx);
 
     /// handleVirtualRegisterDef - update intervals for a virtual
     /// register def
     void handleVirtualRegisterDef(MachineBasicBlock *MBB,
                                   MachineBasicBlock::iterator MI,
-                                  LiveIndex MIIdx, MachineOperand& MO,
+                                  SlotIndex MIIdx, MachineOperand& MO,
                                   unsigned MOIdx,
                                   LiveInterval& interval);
 
@@ -458,13 +316,13 @@ namespace llvm {
     /// def.
     void handlePhysicalRegisterDef(MachineBasicBlock* mbb,
                                    MachineBasicBlock::iterator mi,
-                                   LiveIndex MIIdx, MachineOperand& MO,
+                                   SlotIndex MIIdx, MachineOperand& MO,
                                    LiveInterval &interval,
                                    MachineInstr *CopyMI);
 
     /// handleLiveInRegister - Create interval for a livein register.
     void handleLiveInRegister(MachineBasicBlock* mbb,
-                              LiveIndex MIIdx,
+                              SlotIndex MIIdx,
                               LiveInterval &interval, bool isAlias = false);
 
     /// getReMatImplicitUse - If the remat definition MI has one (for now, we
@@ -477,7 +335,7 @@ namespace llvm {
     /// which reaches the given instruction also reaches the specified use
     /// index.
     bool isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
-                            LiveIndex UseIdx) const;
+                            SlotIndex UseIdx) const;
 
     /// isReMaterializable - Returns true if the definition MI of the specified
     /// val# of the specified interval is re-materializable. Also returns true
@@ -492,7 +350,7 @@ namespace llvm {
     /// MI. If it is successul, MI is updated with the newly created MI and
     /// returns true.
     bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
-                              MachineInstr *DefMI, LiveIndex InstrIdx,
+                              MachineInstr *DefMI, SlotIndex InstrIdx,
                               SmallVector<unsigned, 2> &Ops,
                               bool isSS, int FrameIndex, unsigned Reg);
 
@@ -506,7 +364,7 @@ namespace llvm {
     /// VNInfo that's after the specified index but is within the basic block.
     bool anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI,
                               MachineBasicBlock *MBB,
-                              LiveIndex Idx) const;
+                              SlotIndex Idx) const;
 
     /// hasAllocatableSuperReg - Return true if the specified physical register
     /// has any super register that's allocatable.
@@ -514,17 +372,17 @@ namespace llvm {
 
     /// SRInfo - Spill / restore info.
     struct SRInfo {
-      LiveIndex index;
+      SlotIndex index;
       unsigned vreg;
       bool canFold;
-      SRInfo(LiveIndex i, unsigned vr, bool f)
+      SRInfo(SlotIndex i, unsigned vr, bool f)
         : index(i), vreg(vr), canFold(f) {}
     };
 
-    bool alsoFoldARestore(int Id, LiveIndex index, unsigned vr,
+    bool alsoFoldARestore(int Id, SlotIndex index, unsigned vr,
                           BitVector &RestoreMBBs,
                           DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
-    void eraseRestoreInfo(int Id, LiveIndex index, unsigned vr,
+    void eraseRestoreInfo(int Id, SlotIndex index, unsigned vr,
                           BitVector &RestoreMBBs,
                           DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
 
@@ -543,7 +401,7 @@ namespace llvm {
     /// functions for addIntervalsForSpills to rewrite uses / defs for the given
     /// live range.
     bool rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-        bool TrySplit, LiveIndex index, LiveIndex end,
+        bool TrySplit, SlotIndex index, SlotIndex end,
         MachineInstr *MI, MachineInstr *OrigDefMI, MachineInstr *DefMI,
         unsigned Slot, int LdSlot,
         bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index d63a222..e01d1ae 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -48,8 +48,6 @@ namespace llvm {
     iterator begin() { return S2IMap.begin(); }
     iterator end() { return S2IMap.end(); }
 
-    void scaleNumbering(int factor);
-
     unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
 
     LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 2a9e86a..585ee14 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -76,6 +76,10 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
   /// exception handler.
   bool IsLandingPad;
 
+  /// AddressTaken - Indicate that this basic block is potentially the
+  /// target of an indirect branch.
+  bool AddressTaken;
+
   // Intrusive list support
   MachineBasicBlock() {}
 
@@ -92,6 +96,14 @@ public:
   ///
   const BasicBlock *getBasicBlock() const { return BB; }
 
+  /// hasAddressTaken - Test whether this block is potentially the target
+  /// of an indirect branch.
+  bool hasAddressTaken() const { return AddressTaken; }
+
+  /// setHasAddressTaken - Set this block to reflect that it potentially
+  /// is the target of an indirect branch.
+  void setHasAddressTaken() { AddressTaken = true; }
+
   /// getParent - Return the MachineFunction containing this basic block.
   ///
   const MachineFunction *getParent() const { return xParent; }
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index abb6dd9..c55a9e6 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -17,7 +17,7 @@
 #ifndef LLVM_CODEGEN_MACHINECODEEMITTER_H
 #define LLVM_CODEGEN_MACHINECODEEMITTER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index a04189c..07c1eca 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -16,7 +16,7 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 #include <vector>
 
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 7f681d7..6ca63f0 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -108,13 +108,6 @@ public:
     return *this;
   }
 
-  const MachineInstrBuilder &addMetadata(MDNode *N,
-                                         int64_t Offset = 0,
-                                         unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateMDNode(N, Offset, TargetFlags));
-    return *this;
-  }
-
   const MachineInstrBuilder &addExternalSymbol(const char *FnName,
                                           unsigned char TargetFlags = 0) const {
     MI->addOperand(MachineOperand::CreateES(FnName, TargetFlags));
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 1b924f2..f2b027b 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -32,7 +32,7 @@
 #define LLVM_CODEGEN_MACHINEMODULEINFO_H
 
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/UniqueVector.h"
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index f715c44..eede2cc 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -14,15 +14,15 @@
 #ifndef LLVM_CODEGEN_MACHINEOPERAND_H
 #define LLVM_CODEGEN_MACHINEOPERAND_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
   
 class ConstantFP;
+class BlockAddress;
 class MachineBasicBlock;
 class GlobalValue;
-class MDNode;
 class MachineInstr;
 class TargetMachine;
 class MachineRegisterInfo;
@@ -42,7 +42,7 @@ public:
     MO_JumpTableIndex,         ///< Address of indexed Jump Table for switch
     MO_ExternalSymbol,         ///< Name of external global symbol
     MO_GlobalAddress,          ///< Address of a global value
-    MO_Metadata                ///< Metadata info
+    MO_BlockAddress            ///< Address of a basic block
   };
 
 private:
@@ -108,7 +108,7 @@ private:
         int Index;                // For MO_*Index - The index itself.
         const char *SymbolName;   // For MO_ExternalSymbol.
         GlobalValue *GV;          // For MO_GlobalAddress.
-        MDNode *Node;             // For MO_Metadata.
+        BlockAddress *BA;         // For MO_BlockAddress.
       } Val;
       int64_t Offset;             // An offset from the object.
     } OffsetedInfo;
@@ -156,8 +156,8 @@ public:
   bool isGlobal() const { return OpKind == MO_GlobalAddress; }
   /// isSymbol - Tests if this is a MO_ExternalSymbol operand.
   bool isSymbol() const { return OpKind == MO_ExternalSymbol; }
-  /// isMetadata - Tests if this is a MO_Metadata operand.
-  bool isMetadata() const { return OpKind == MO_Metadata; }
+  /// isBlockAddress - Tests if this is a MO_BlockAddress operand.
+  bool isBlockAddress() const { return OpKind == MO_BlockAddress; }
 
   //===--------------------------------------------------------------------===//
   // Accessors for Register Operands
@@ -293,15 +293,16 @@ public:
     assert(isGlobal() && "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.GV;
   }
-  
-  MDNode *getMDNode() const {
-    return Contents.OffsetedInfo.Val.Node;
+
+  BlockAddress *getBlockAddress() const {
+    assert(isBlockAddress() && "Wrong MachineOperand accessor");
+    return Contents.OffsetedInfo.Val.BA;
   }
   
   /// getOffset - Return the offset from the symbol in this operand. This always
   /// returns 0 for ExternalSymbol operands.
   int64_t getOffset() const {
-    assert((isGlobal() || isSymbol() || isCPI()) &&
+    assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
            "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Offset;
   }
@@ -321,7 +322,7 @@ public:
   }
 
   void setOffset(int64_t Offset) {
-    assert((isGlobal() || isSymbol() || isCPI() || isMetadata()) &&
+    assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
         "Wrong MachineOperand accessor");
     Contents.OffsetedInfo.Offset = Offset;
   }
@@ -426,14 +427,6 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateMDNode(MDNode *N, int64_t Offset,
-                                     unsigned char TargetFlags = 0) {
-    MachineOperand Op(MachineOperand::MO_Metadata);
-    Op.Contents.OffsetedInfo.Val.Node = N;
-    Op.setOffset(Offset);
-    Op.setTargetFlags(TargetFlags);
-    return Op;
-  }
   static MachineOperand CreateES(const char *SymName,
                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_ExternalSymbol);
@@ -442,6 +435,12 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
+  static MachineOperand CreateBA(BlockAddress *BA) {
+    MachineOperand Op(MachineOperand::MO_BlockAddress);
+    Op.Contents.OffsetedInfo.Val.BA = BA;
+    Op.setOffset(0); // Offset is always 0.
+    return Op;
+  }
 
   friend class MachineInstr;
   friend class MachineRegisterInfo;
diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h
index c539781..6ea8f07 100644
--- a/include/llvm/CodeGen/MachineRelocation.h
+++ b/include/llvm/CodeGen/MachineRelocation.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_CODEGEN_MACHINERELOCATION_H
 #define LLVM_CODEGEN_MACHINERELOCATION_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h
new file mode 100644
index 0000000..cec867f
--- /dev/null
+++ b/include/llvm/CodeGen/ProcessImplicitDefs.h
@@ -0,0 +1,41 @@
+//===-------------- llvm/CodeGen/ProcessImplicitDefs.h ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
+#define LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+  class MachineInstr;
+  class TargetInstrInfo;
+
+  /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+  /// for each use. Add isUndef marker to implicit_def defs and their uses.
+  class ProcessImplicitDefs : public MachineFunctionPass {
+  private:
+
+    bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
+                                unsigned OpIdx, const TargetInstrInfo *tii_);
+
+  public:
+    static char ID;
+
+    ProcessImplicitDefs() : MachineFunctionPass(&ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+  };
+
+}
+
+#endif // LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index 7a9122d..26392f5 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -43,6 +43,10 @@ namespace llvm {
     /// PseudoSourceValue may also be pointed to by an LLVM IR Value.
     virtual bool isAliased(const MachineFrameInfo *) const;
 
+    /// mayAlias - Return true if the memory pointed to by this
+    /// PseudoSourceValue can ever alias a LLVM IR Value.
+    virtual bool mayAlias(const MachineFrameInfo *) const;
+
     /// classof - Methods for support type inquiry through isa, cast, and
     /// dyn_cast:
     ///
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index fdbbb1e..d5e7020 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -340,28 +340,34 @@ namespace llvm {
     void removePred(const SDep &D);
 
     /// getDepth - Return the depth of this node, which is the length of the
-    /// maximum path up to any node with has no predecessors.
-    unsigned getDepth() const {
-      if (!isDepthCurrent) const_cast<SUnit *>(this)->ComputeDepth();
+    /// maximum path up to any node with has no predecessors. If IgnoreAntiDep
+    /// is true, ignore anti-dependence edges.
+    unsigned getDepth(bool IgnoreAntiDep=false) const {
+      if (!isDepthCurrent) 
+        const_cast<SUnit *>(this)->ComputeDepth(IgnoreAntiDep);
       return Depth;
     }
 
     /// getHeight - Return the height of this node, which is the length of the
-    /// maximum path down to any node with has no successors.
-    unsigned getHeight() const {
-      if (!isHeightCurrent) const_cast<SUnit *>(this)->ComputeHeight();
+    /// maximum path down to any node with has no successors. If IgnoreAntiDep
+    /// is true, ignore anti-dependence edges.
+    unsigned getHeight(bool IgnoreAntiDep=false) const {
+      if (!isHeightCurrent) 
+        const_cast<SUnit *>(this)->ComputeHeight(IgnoreAntiDep);
       return Height;
     }
 
-    /// setDepthToAtLeast - If NewDepth is greater than this node's depth
-    /// value, set it to be the new depth value. This also recursively
-    /// marks successor nodes dirty.
-    void setDepthToAtLeast(unsigned NewDepth);
+    /// setDepthToAtLeast - If NewDepth is greater than this node's
+    /// depth value, set it to be the new depth value. This also
+    /// recursively marks successor nodes dirty.  If IgnoreAntiDep is
+    /// true, ignore anti-dependence edges.
+    void setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep=false);
 
-    /// setDepthToAtLeast - If NewDepth is greater than this node's depth
-    /// value, set it to be the new height value. This also recursively
-    /// marks predecessor nodes dirty.
-    void setHeightToAtLeast(unsigned NewHeight);
+    /// setDepthToAtLeast - If NewDepth is greater than this node's
+    /// depth value, set it to be the new height value. This also
+    /// recursively marks predecessor nodes dirty. If IgnoreAntiDep is
+    /// true, ignore anti-dependence edges.
+    void setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep=false);
 
     /// setDepthDirty - Set a flag in this node to indicate that its
     /// stored Depth value will require recomputation the next time
@@ -394,8 +400,8 @@ namespace llvm {
     void print(raw_ostream &O, const ScheduleDAG *G) const;
 
   private:
-    void ComputeDepth();
-    void ComputeHeight();
+    void ComputeDepth(bool IgnoreAntiDep);
+    void ComputeHeight(bool IgnoreAntiDep);
   };
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index e0198ef..8400e86 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -326,6 +326,8 @@ public:
                           unsigned Line, unsigned Col, MDNode *CU);
   SDValue getLabel(unsigned Opcode, DebugLoc dl, SDValue Root,
                    unsigned LabelID);
+  SDValue getBlockAddress(BlockAddress *BA, DebugLoc dl,
+                          bool isTarget = false);
 
   SDValue getCopyToReg(SDValue Chain, DebugLoc dl, unsigned Reg, SDValue N) {
     return getNode(ISD::CopyToReg, dl, MVT::Other, Chain,
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 2b713f1..5d33224 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -110,6 +110,14 @@ protected:
   bool CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
                     int64_t DesiredMaskS) const;
   
+  // Calls to these functions are generated by tblgen.
+  SDNode *Select_INLINEASM(SDValue N);
+  SDNode *Select_UNDEF(const SDValue &N);
+  SDNode *Select_DBG_LABEL(const SDValue &N);
+  SDNode *Select_EH_LABEL(const SDValue &N);
+  void CannotYetSelect(SDValue N);
+  void CannotYetSelectIntrinsic(SDValue N);
+
 private:
   void SelectAllBasicBlocks(Function &Fn, MachineFunction &MF,
                             MachineModuleInfo *MMI,
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index d7c8f1c..f960851 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -28,7 +28,7 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 #include <cassert>
 
@@ -97,7 +97,7 @@ namespace ISD {
     BasicBlock, VALUETYPE, CONDCODE, Register,
     Constant, ConstantFP,
     GlobalAddress, GlobalTLSAddress, FrameIndex,
-    JumpTable, ConstantPool, ExternalSymbol,
+    JumpTable, ConstantPool, ExternalSymbol, BlockAddress,
 
     // The address of the GOT
     GLOBAL_OFFSET_TABLE,
@@ -146,6 +146,7 @@ namespace ISD {
     TargetJumpTable,
     TargetConstantPool,
     TargetExternalSymbol,
+    TargetBlockAddress,
 
     /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
     /// This node represents a target intrinsic function with no side effects.
@@ -2026,11 +2027,27 @@ public:
   }
 };
 
+class BlockAddressSDNode : public SDNode {
+  BlockAddress *BA;
+  friend class SelectionDAG;
+  BlockAddressSDNode(unsigned NodeTy, DebugLoc dl, EVT VT, BlockAddress *ba)
+    : SDNode(NodeTy, dl, getSDVTList(VT)), BA(ba) {
+  }
+public:
+  BlockAddress *getBlockAddress() const { return BA; }
+
+  static bool classof(const BlockAddressSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::BlockAddress ||
+           N->getOpcode() == ISD::TargetBlockAddress;
+  }
+};
+
 class LabelSDNode : public SDNode {
   SDUse Chain;
   unsigned LabelID;
   friend class SelectionDAG;
-LabelSDNode(unsigned NodeTy, DebugLoc dl, SDValue ch, unsigned id)
+  LabelSDNode(unsigned NodeTy, DebugLoc dl, SDValue ch, unsigned id)
     : SDNode(NodeTy, dl, getSDVTList(MVT::Other)), LabelID(id) {
     InitOperands(&Chain, ch);
   }
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
new file mode 100644
index 0000000..3f175a7
--- /dev/null
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -0,0 +1,740 @@
+//===- llvm/CodeGen/SlotIndexes.h - Slot indexes representation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements SlotIndex and related classes. The purpuse of SlotIndex
+// is to describe a position at which a register can become live, or cease to
+// be live.
+//
+// SlotIndex is mostly a proxy for entries of the SlotIndexList, a class which
+// is held is LiveIntervals and provides the real numbering. This allows
+// LiveIntervals to perform largely transparent renumbering. The SlotIndex
+// class does hold a PHI bit, which determines whether the index relates to a
+// PHI use or def point, or an actual instruction. See the SlotIndex class
+// description for futher information.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SLOTINDEXES_H
+#define LLVM_CODEGEN_SLOTINDEXES_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+  /// This class represents an entry in the slot index list held in the
+  /// SlotIndexes pass. It should not be used directly. See the
+  /// SlotIndex & SlotIndexes classes for the public interface to this
+  /// information.
+  class IndexListEntry {
+  private:
+
+    IndexListEntry *next, *prev;
+    MachineInstr *mi;
+    unsigned index;
+
+  public:
+
+    IndexListEntry(MachineInstr *mi, unsigned index)
+      : mi(mi), index(index) {}
+
+    MachineInstr* getInstr() const { return mi; }
+    void setInstr(MachineInstr *mi) { this->mi = mi; }
+
+    unsigned getIndex() const { return index; }
+    void setIndex(unsigned index) { this->index = index; }
+    
+    IndexListEntry* getNext() { return next; }
+    const IndexListEntry* getNext() const { return next; }
+    void setNext(IndexListEntry *next) { this->next = next; }
+
+    IndexListEntry* getPrev() { return prev; }
+    const IndexListEntry* getPrev() const { return prev; }
+    void setPrev(IndexListEntry *prev) { this->prev = prev; }
+  };
+
+  // Specialize PointerLikeTypeTraits for IndexListEntry.
+  template <>
+  class PointerLikeTypeTraits<IndexListEntry*> { 
+  public:
+    static inline void* getAsVoidPointer(IndexListEntry *p) {
+      return p;
+    }
+    static inline IndexListEntry* getFromVoidPointer(void *p) {
+      return static_cast<IndexListEntry*>(p);
+    }
+    enum { NumLowBitsAvailable = 3 };
+  };
+
+  /// SlotIndex - An opaque wrapper around machine indexes.
+  class SlotIndex {
+    friend class SlotIndexes;
+    friend class DenseMapInfo<SlotIndex>;
+
+  private:
+
+    // FIXME: Is there any way to statically allocate these things and have
+    // them 8-byte aligned?
+    static std::auto_ptr<IndexListEntry> emptyKeyPtr, tombstoneKeyPtr;
+    static const unsigned PHI_BIT = 1 << 2;
+
+    PointerIntPair<IndexListEntry*, 3, unsigned> lie;
+
+    SlotIndex(IndexListEntry *entry, unsigned phiAndSlot)
+      : lie(entry, phiAndSlot) {
+      assert(entry != 0 && "Attempt to construct index with 0 pointer.");
+    }
+
+    IndexListEntry& entry() const {
+      assert(lie.getPointer() != 0 && "Use of invalid index.");
+      return *lie.getPointer();
+    }
+
+    int getIndex() const {
+      return entry().getIndex() | getSlot();
+    }
+
+    static inline unsigned getHashValue(const SlotIndex &v) {
+      IndexListEntry *ptrVal = &v.entry();
+      return (unsigned((intptr_t)ptrVal) >> 4) ^
+             (unsigned((intptr_t)ptrVal) >> 9);
+    }
+
+  public:
+
+    // FIXME: Ugh. This is public because LiveIntervalAnalysis is still using it
+    // for some spill weight stuff. Fix that, then make this private.
+    enum Slot { LOAD, USE, DEF, STORE, NUM };
+
+    static inline SlotIndex getEmptyKey() {
+      // FIXME: How do we guarantee these numbers don't get allocated to
+      // legit indexes?
+      if (emptyKeyPtr.get() == 0)
+        emptyKeyPtr.reset(new IndexListEntry(0, ~0U & ~3U));
+
+      return SlotIndex(emptyKeyPtr.get(), 0);
+    }
+
+    static inline SlotIndex getTombstoneKey() {
+      // FIXME: How do we guarantee these numbers don't get allocated to
+      // legit indexes?
+      if (tombstoneKeyPtr.get() == 0)
+        tombstoneKeyPtr.reset(new IndexListEntry(0, ~0U & ~7U));
+
+      return SlotIndex(tombstoneKeyPtr.get(), 0);
+    }
+    
+    /// Construct an invalid index.
+    SlotIndex() : lie(&getEmptyKey().entry(), 0) {}
+
+    // Construct a new slot index from the given one, set the phi flag on the
+    // new index to the value of the phi parameter.
+    SlotIndex(const SlotIndex &li, bool phi)
+      : lie(&li.entry(), phi ? PHI_BIT & li.getSlot() : (unsigned)li.getSlot()){
+      assert(lie.getPointer() != 0 &&
+             "Attempt to construct index with 0 pointer.");
+    }
+
+    // Construct a new slot index from the given one, set the phi flag on the
+    // new index to the value of the phi parameter, and the slot to the new slot.
+    SlotIndex(const SlotIndex &li, bool phi, Slot s)
+      : lie(&li.entry(), phi ? PHI_BIT & s : (unsigned)s) {
+      assert(lie.getPointer() != 0 &&
+             "Attempt to construct index with 0 pointer.");
+    }
+
+    /// Returns true if this is a valid index. Invalid indicies do
+    /// not point into an index table, and cannot be compared.
+    bool isValid() const {
+      return (lie.getPointer() != 0) && (lie.getPointer()->getIndex() != 0);
+    }
+
+    /// Print this index to the given raw_ostream.
+    void print(raw_ostream &os) const;
+
+    /// Dump this index to stderr.
+    void dump() const;
+
+    /// Compare two SlotIndex objects for equality.
+    bool operator==(SlotIndex other) const {
+      return getIndex() == other.getIndex();
+    }
+    /// Compare two SlotIndex objects for inequality.
+    bool operator!=(SlotIndex other) const {
+      return getIndex() != other.getIndex(); 
+    }
+   
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is strictly lower than the second.
+    bool operator<(SlotIndex other) const {
+      return getIndex() < other.getIndex();
+    }
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is lower than, or equal to, the second.
+    bool operator<=(SlotIndex other) const {
+      return getIndex() <= other.getIndex();
+    }
+
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is greater than the second.
+    bool operator>(SlotIndex other) const {
+      return getIndex() > other.getIndex();
+    }
+
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is greater than, or equal to, the second.
+    bool operator>=(SlotIndex other) const {
+      return getIndex() >= other.getIndex();
+    }
+
+    /// Return the distance from this index to the given one.
+    int distance(SlotIndex other) const {
+      return other.getIndex() - getIndex();
+    }
+
+    /// Returns the slot for this SlotIndex.
+    Slot getSlot() const {
+      return static_cast<Slot>(lie.getInt()  & ~PHI_BIT);
+    }
+
+    /// Returns the state of the PHI bit.
+    bool isPHI() const {
+      return lie.getInt() & PHI_BIT;
+    }
+
+    /// Returns the base index for associated with this index. The base index
+    /// is the one associated with the LOAD slot for the instruction pointed to
+    /// by this index.
+    SlotIndex getBaseIndex() const {
+      return getLoadIndex();
+    }
+
+    /// Returns the boundary index for associated with this index. The boundary
+    /// index is the one associated with the LOAD slot for the instruction
+    /// pointed to by this index.
+    SlotIndex getBoundaryIndex() const {
+      return getStoreIndex();
+    }
+
+    /// Returns the index of the LOAD slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getLoadIndex() const {
+      return SlotIndex(&entry(), SlotIndex::LOAD);
+    }    
+
+    /// Returns the index of the USE slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getUseIndex() const {
+      return SlotIndex(&entry(), SlotIndex::USE);
+    }
+
+    /// Returns the index of the DEF slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getDefIndex() const {
+      return SlotIndex(&entry(), SlotIndex::DEF);
+    }
+
+    /// Returns the index of the STORE slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getStoreIndex() const {
+      return SlotIndex(&entry(), SlotIndex::STORE);
+    }    
+
+    /// Returns the next slot in the index list. This could be either the
+    /// next slot for the instruction pointed to by this index or, if this
+    /// index is a STORE, the first slot for the next instruction.
+    /// WARNING: This method is considerably more expensive than the methods
+    /// that return specific slots (getUseIndex(), etc). If you can - please
+    /// use one of those methods.
+    SlotIndex getNextSlot() const {
+      Slot s = getSlot();
+      if (s == SlotIndex::STORE) {
+        return SlotIndex(entry().getNext(), SlotIndex::LOAD);
+      }
+      return SlotIndex(&entry(), s + 1);
+    }
+
+    /// Returns the next index. This is the index corresponding to the this
+    /// index's slot, but for the next instruction.
+    SlotIndex getNextIndex() const {
+      return SlotIndex(entry().getNext(), getSlot());
+    }
+
+    /// Returns the previous slot in the index list. This could be either the
+    /// previous slot for the instruction pointed to by this index or, if this
+    /// index is a LOAD, the last slot for the previous instruction.
+    /// WARNING: This method is considerably more expensive than the methods
+    /// that return specific slots (getUseIndex(), etc). If you can - please
+    /// use one of those methods.
+    SlotIndex getPrevSlot() const {
+      Slot s = getSlot();
+      if (s == SlotIndex::LOAD) {
+        return SlotIndex(entry().getPrev(), SlotIndex::STORE);
+      }
+      return SlotIndex(&entry(), s - 1);
+    }
+
+    /// Returns the previous index. This is the index corresponding to this
+    /// index's slot, but for the previous instruction.
+    SlotIndex getPrevIndex() const {
+      return SlotIndex(entry().getPrev(), getSlot());
+    }
+
+  };
+
+  /// DenseMapInfo specialization for SlotIndex.
+  template <>
+  struct DenseMapInfo<SlotIndex> {
+    static inline SlotIndex getEmptyKey() {
+      return SlotIndex::getEmptyKey();
+    }
+    static inline SlotIndex getTombstoneKey() {
+      return SlotIndex::getTombstoneKey();
+    }
+    static inline unsigned getHashValue(const SlotIndex &v) {
+      return SlotIndex::getHashValue(v);
+    }
+    static inline bool isEqual(const SlotIndex &LHS, const SlotIndex &RHS) {
+      return (LHS == RHS);
+    }
+    static inline bool isPod() { return false; }
+  };
+
+  inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
+    li.print(os);
+    return os;
+  }
+
+  typedef std::pair<SlotIndex, MachineBasicBlock*> IdxMBBPair;
+
+  inline bool operator<(SlotIndex V, const IdxMBBPair &IM) {
+    return V < IM.first;
+  }
+
+  inline bool operator<(const IdxMBBPair &IM, SlotIndex V) {
+    return IM.first < V;
+  }
+
+  struct Idx2MBBCompare {
+    bool operator()(const IdxMBBPair &LHS, const IdxMBBPair &RHS) const {
+      return LHS.first < RHS.first;
+    }
+  };
+
+  /// SlotIndexes pass.
+  ///
+  /// This pass assigns indexes to each instruction.
+  class SlotIndexes : public MachineFunctionPass {
+  private:
+
+    MachineFunction *mf;
+    IndexListEntry *indexListHead;
+    unsigned functionSize;
+
+    typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap;
+    Mi2IndexMap mi2iMap;
+
+    /// MBB2IdxMap - The indexes of the first and last instructions in the
+    /// specified basic block.
+    typedef DenseMap<const MachineBasicBlock*,
+                     std::pair<SlotIndex, SlotIndex> > MBB2IdxMap;
+    MBB2IdxMap mbb2IdxMap;
+
+    /// Idx2MBBMap - Sorted list of pairs of index of first instruction
+    /// and MBB id.
+    std::vector<IdxMBBPair> idx2MBBMap;
+
+    typedef DenseMap<const MachineBasicBlock*, SlotIndex> TerminatorGapsMap;
+    TerminatorGapsMap terminatorGaps;
+
+    // IndexListEntry allocator.
+    BumpPtrAllocator ileAllocator;
+
+    IndexListEntry* createEntry(MachineInstr *mi, unsigned index) {
+      IndexListEntry *entry =
+        static_cast<IndexListEntry*>(
+          ileAllocator.Allocate(sizeof(IndexListEntry),
+          alignof<IndexListEntry>()));
+
+      new (entry) IndexListEntry(mi, index);
+
+      return entry;
+    }
+
+    void initList() {
+      assert(indexListHead == 0 && "Zero entry non-null at initialisation.");
+      indexListHead = createEntry(0, ~0U);
+      indexListHead->setNext(0);
+      indexListHead->setPrev(indexListHead);
+    }
+
+    void clearList() {
+      indexListHead = 0;
+      ileAllocator.Reset();
+    }
+
+    IndexListEntry* getTail() {
+      assert(indexListHead != 0 && "Call to getTail on uninitialized list.");
+      return indexListHead->getPrev();
+    }
+
+    const IndexListEntry* getTail() const {
+      assert(indexListHead != 0 && "Call to getTail on uninitialized list.");
+      return indexListHead->getPrev();
+    }
+
+    // Returns true if the index list is empty.
+    bool empty() const { return (indexListHead == getTail()); }
+
+    IndexListEntry* front() {
+      assert(!empty() && "front() called on empty index list.");
+      return indexListHead;
+    }
+
+    const IndexListEntry* front() const {
+      assert(!empty() && "front() called on empty index list.");
+      return indexListHead;
+    }
+
+    IndexListEntry* back() {
+      assert(!empty() && "back() called on empty index list.");
+      return getTail()->getPrev();
+    }
+
+    const IndexListEntry* back() const {
+      assert(!empty() && "back() called on empty index list.");
+      return getTail()->getPrev();
+    }
+
+    /// Insert a new entry before itr.
+    void insert(IndexListEntry *itr, IndexListEntry *val) {
+      assert(itr != 0 && "itr should not be null.");
+      IndexListEntry *prev = itr->getPrev();
+      val->setNext(itr);
+      val->setPrev(prev);
+      
+      if (itr != indexListHead) {
+        prev->setNext(val);
+      }
+      else {
+        indexListHead = val;
+      }
+      itr->setPrev(val);
+    }
+
+    /// Push a new entry on to the end of the list.
+    void push_back(IndexListEntry *val) {
+      insert(getTail(), val);
+    }
+
+  public:
+    static char ID;
+
+    SlotIndexes() : MachineFunctionPass(&ID), indexListHead(0) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+    virtual void releaseMemory(); 
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+    /// Dump the indexes.
+    void dump() const;
+
+    /// Renumber the index list, providing space for new instructions.
+    void renumber();
+
+    /// Returns the zero index for this analysis.
+    SlotIndex getZeroIndex() {
+      assert(front()->getIndex() == 0 && "First index is not 0?");
+      return SlotIndex(front(), 0);
+    }
+
+    /// Returns the invalid index marker for this analysis.
+    SlotIndex getInvalidIndex() {
+      return getZeroIndex();
+    }
+
+    /// Returns the distance between the highest and lowest indexes allocated
+    /// so far.
+    unsigned getIndexesLength() const {
+      assert(front()->getIndex() == 0 &&
+             "Initial index isn't zero?");
+
+      return back()->getIndex();
+    }
+
+    /// Returns the number of instructions in the function.
+    unsigned getFunctionSize() const {
+      return functionSize;
+    }
+
+    /// Returns true if the given machine instr is mapped to an index,
+    /// otherwise returns false.
+    bool hasIndex(const MachineInstr *instr) const {
+      return (mi2iMap.find(instr) != mi2iMap.end());
+    }
+
+    /// Returns the base index for the given instruction.
+    SlotIndex getInstructionIndex(const MachineInstr *instr) const {
+      Mi2IndexMap::const_iterator itr = mi2iMap.find(instr);
+      assert(itr != mi2iMap.end() && "Instruction not found in maps.");
+      return itr->second;
+    }
+
+    /// Returns the instruction for the given index, or null if the given
+    /// index has no instruction associated with it.
+    MachineInstr* getInstructionFromIndex(SlotIndex index) const {
+      return index.entry().getInstr();
+    }
+
+    /// Returns the next non-null index.
+    SlotIndex getNextNonNullIndex(SlotIndex index) {
+      SlotIndex nextNonNull = index.getNextIndex();
+
+      while (&nextNonNull.entry() != getTail() &&
+             getInstructionFromIndex(nextNonNull) == 0) {
+        nextNonNull = nextNonNull.getNextIndex();
+      }
+
+      return nextNonNull;
+    }
+
+    /// Returns the first index in the given basic block.
+    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+      MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb);
+      assert(itr != mbb2IdxMap.end() && "MBB not found in maps.");
+      return itr->second.first;
+    }
+
+    /// Returns the last index in the given basic block.
+    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
+      MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb);
+      assert(itr != mbb2IdxMap.end() && "MBB not found in maps.");
+      return itr->second.second;
+    }
+
+    /// Returns the terminator gap for the given index.
+    SlotIndex getTerminatorGap(const MachineBasicBlock *mbb) {
+      TerminatorGapsMap::iterator itr = terminatorGaps.find(mbb);
+      assert(itr != terminatorGaps.end() &&
+             "All MBBs should have terminator gaps in their indexes.");
+      return itr->second;
+    }
+
+    /// Returns the basic block which the given index falls in.
+    MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
+      std::vector<IdxMBBPair>::const_iterator I =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index);
+      // Take the pair containing the index
+      std::vector<IdxMBBPair>::const_iterator J =
+        ((I != idx2MBBMap.end() && I->first > index) ||
+         (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I;
+
+      assert(J != idx2MBBMap.end() && J->first <= index &&
+             index <= getMBBEndIdx(J->second) &&
+             "index does not correspond to an MBB");
+      return J->second;
+    }
+
+    bool findLiveInMBBs(SlotIndex start, SlotIndex end,
+                        SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
+      std::vector<IdxMBBPair>::const_iterator itr =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
+      bool resVal = false;
+
+      while (itr != idx2MBBMap.end()) {
+        if (itr->first >= end)
+          break;
+        mbbs.push_back(itr->second);
+        resVal = true;
+        ++itr;
+      }
+      return resVal;
+    }
+
+    /// Return a list of MBBs that can be reach via any branches or
+    /// fall-throughs.
+    bool findReachableMBBs(SlotIndex start, SlotIndex end,
+                           SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
+      std::vector<IdxMBBPair>::const_iterator itr =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
+
+      bool resVal = false;
+      while (itr != idx2MBBMap.end()) {
+        if (itr->first > end)
+          break;
+        MachineBasicBlock *mbb = itr->second;
+        if (getMBBEndIdx(mbb) > end)
+          break;
+        for (MachineBasicBlock::succ_iterator si = mbb->succ_begin(),
+             se = mbb->succ_end(); si != se; ++si)
+          mbbs.push_back(*si);
+        resVal = true;
+        ++itr;
+      }
+      return resVal;
+    }
+
+    /// Returns the MBB covering the given range, or null if the range covers
+    /// more than one basic block.
+    MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
+
+      assert(start < end && "Backwards ranges not allowed.");
+
+      std::vector<IdxMBBPair>::const_iterator itr =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
+
+      if (itr == idx2MBBMap.end()) {
+        itr = prior(itr);
+        return itr->second;
+      }
+
+      // Check that we don't cross the boundary into this block.
+      if (itr->first < end)
+        return 0;
+
+      itr = prior(itr);
+
+      if (itr->first <= start)
+        return itr->second;
+
+      return 0;
+    }
+
+    /// Returns true if there is a gap in the numbering before the given index.
+    bool hasGapBeforeInstr(SlotIndex index) {
+      index = index.getBaseIndex();
+      SlotIndex prevIndex = index.getPrevIndex();
+      
+      if (prevIndex == getZeroIndex())
+        return false;
+
+      if (getInstructionFromIndex(prevIndex) == 0)
+        return true;
+
+      if (prevIndex.distance(index) >= 2 * SlotIndex::NUM)
+        return true;
+
+      return false;
+    }
+
+    /// Returns true if there is a gap in the numbering after the given index.
+    bool hasGapAfterInstr(SlotIndex index) const {
+      // Not implemented yet.
+      assert(false &&
+             "SlotIndexes::hasGapAfterInstr(SlotIndex) not implemented yet.");
+      return false;
+    }
+
+    /// findGapBeforeInstr - Find an empty instruction slot before the
+    /// specified index. If "Furthest" is true, find one that's furthest
+    /// away from the index (but before any index that's occupied).
+    // FIXME: This whole method should go away in future. It should
+    // always be possible to insert code between existing indices.
+    SlotIndex findGapBeforeInstr(SlotIndex index, bool furthest = false) {
+      if (index == getZeroIndex())
+        return getInvalidIndex();
+
+      index = index.getBaseIndex();
+      SlotIndex prevIndex = index.getPrevIndex();
+
+      if (prevIndex == getZeroIndex())
+        return getInvalidIndex();
+
+      // Try to reuse existing index objects with null-instrs.
+      if (getInstructionFromIndex(prevIndex) == 0) {
+        if (furthest) {
+          while (getInstructionFromIndex(prevIndex) == 0 &&
+                 prevIndex != getZeroIndex()) {
+            prevIndex = prevIndex.getPrevIndex();
+          }
+
+          prevIndex = prevIndex.getNextIndex();
+        }
+ 
+        assert(getInstructionFromIndex(prevIndex) == 0 && "Index list is broken.");
+
+        return prevIndex;
+      }
+
+      int dist = prevIndex.distance(index);
+
+      // Double check that the spacing between this instruction and
+      // the last is sane.
+      assert(dist >= SlotIndex::NUM &&
+             "Distance between indexes too small.");
+
+      // If there's no gap return an invalid index.
+      if (dist < 2*SlotIndex::NUM) {
+        return getInvalidIndex();
+      }
+
+      // Otherwise insert new index entries into the list using the
+      // gap in the numbering.
+      IndexListEntry *newEntry =
+        createEntry(0, prevIndex.entry().getIndex() + SlotIndex::NUM);
+
+      insert(&index.entry(), newEntry);
+
+      // And return a pointer to the entry at the start of the gap.
+      return index.getPrevIndex();
+    }
+
+    /// Insert the given machine instruction into the mapping at the given
+    /// index.
+    void insertMachineInstrInMaps(MachineInstr *mi, SlotIndex index) {
+      index = index.getBaseIndex();
+      IndexListEntry *miEntry = &index.entry();
+      assert(miEntry->getInstr() == 0 && "Index already in use.");
+      miEntry->setInstr(mi);
+
+      assert(mi2iMap.find(mi) == mi2iMap.end() &&
+             "MachineInstr already has an index.");
+
+      mi2iMap.insert(std::make_pair(mi, index));
+    }
+
+    /// Remove the given machine instruction from the mapping.
+    void removeMachineInstrFromMaps(MachineInstr *mi) {
+      // remove index -> MachineInstr and
+      // MachineInstr -> index mappings
+      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(mi);
+      if (mi2iItr != mi2iMap.end()) {
+        IndexListEntry *miEntry(&mi2iItr->second.entry());        
+        assert(miEntry->getInstr() == mi && "Instruction indexes broken.");
+        // FIXME: Eventually we want to actually delete these indexes.
+        miEntry->setInstr(0);
+        mi2iMap.erase(mi2iItr);
+      }
+    }
+
+    /// ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in
+    /// maps used by register allocator.
+    void replaceMachineInstrInMaps(MachineInstr *mi, MachineInstr *newMI) {
+      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(mi);
+      if (mi2iItr == mi2iMap.end())
+        return;
+      SlotIndex replaceBaseIndex = mi2iItr->second;
+      IndexListEntry *miEntry(&replaceBaseIndex.entry());
+      assert(miEntry->getInstr() == mi &&
+             "Mismatched instruction in index tables.");
+      miEntry->setInstr(newMI);
+      mi2iMap.erase(mi2iItr);
+      mi2iMap.insert(std::make_pair(newMI, replaceBaseIndex));
+    }
+
+  };
+
+
+}
+
+#endif // LLVM_CODEGEN_LIVEINDEX_H 
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 1f0dd21..45ef9b9 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -18,7 +18,7 @@
 
 #include <cassert>
 #include <string>
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 
 namespace llvm {
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index a42c7d4..8072fd9 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -48,6 +48,10 @@ protected:
     : User(ty, vty, Ops, NumOps) {}
 
   void destroyConstantImpl();
+  
+  void setOperand(unsigned i, Value *V) {
+    User::setOperand(i, V);
+  }
 public:
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
@@ -61,6 +65,10 @@ public:
   /// true for things like constant expressions that could divide by zero.
   bool canTrap() const;
 
+  /// isConstantUsed - Return true if the constant has users other than constant
+  /// exprs and other dangling things.
+  bool isConstantUsed() const;
+  
   enum PossibleRelocationsTy {
     NoRelocation = 0,
     LocalRelocation = 1,
@@ -83,16 +91,13 @@ public:
   /// FIXME: This really should not be in VMCore.
   PossibleRelocationsTy getRelocationInfo() const;
   
-  // Specialize get/setOperand for Constants as their operands are always
-  // constants as well.
-  Constant *getOperand(unsigned i) {
-    return static_cast<Constant*>(User::getOperand(i));
-  }
-  const Constant *getOperand(unsigned i) const {
-    return static_cast<const Constant*>(User::getOperand(i));
+  // Specialize get/setOperand for Users as their operands are always
+  // constants or BasicBlocks as well.
+  User *getOperand(unsigned i) {
+    return static_cast<User*>(User::getOperand(i));
   }
-  void setOperand(unsigned i, Constant *C) {
-    User::setOperand(i, C);
+  const User *getOperand(unsigned i) const {
+    return static_cast<const User*>(User::getOperand(i));
   }
   
   /// getVectorElements - This method, which is only valid on constant of vector
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 2855fdc..99928d9 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -549,7 +549,47 @@ public:
   }
 };
 
+/// BlockAddress - The address of a basic block.
+///
+class BlockAddress : public Constant {
+  void *operator new(size_t, unsigned);                  // DO NOT IMPLEMENT
+  void *operator new(size_t s) { return User::operator new(s, 2); }
+  BlockAddress(Function *F, BasicBlock *BB);
+public:
+  /// get - Return a BlockAddress for the specified function and basic block.
+  static BlockAddress *get(Function *F, BasicBlock *BB);
+  
+  /// get - Return a BlockAddress for the specified basic block.  The basic
+  /// block must be embedded into a function.
+  static BlockAddress *get(BasicBlock *BB);
+  
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+  
+  Function *getFunction() const { return (Function*)Op<0>().get(); }
+  BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
+  
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const { return false; }
+  
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+  
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const BlockAddress *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == BlockAddressVal;
+  }
+};
+
+template <>
+struct OperandTraits<BlockAddress> : public FixedNumOperandTraits<2> {
+};
 
+DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(BlockAddress, Value)
+  
+//===----------------------------------------------------------------------===//
 /// ConstantExpr - a constant value that is initialized with an expression using
 /// other constant values.
 ///
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index b9da0fc..4b828e46 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -19,6 +19,7 @@
 #include <map>
 #include <string>
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
@@ -42,26 +43,23 @@ class Type;
 
 class ExecutionEngineState {
 public:
-  class MapUpdatingCVH : public CallbackVH {
-    ExecutionEngineState &EES;
-
-  public:
-    MapUpdatingCVH(ExecutionEngineState &EES, const GlobalValue *GV);
-
-    operator const GlobalValue*() const {
-      return cast<GlobalValue>(getValPtr());
-    }
-
-    virtual void deleted();
-    virtual void allUsesReplacedWith(Value *new_value);
+  struct AddressMapConfig : public ValueMapConfig<const GlobalValue*> {
+    typedef ExecutionEngineState *ExtraData;
+    static sys::Mutex *getMutex(ExecutionEngineState *EES);
+    static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old);
+    static void onRAUW(ExecutionEngineState *, const GlobalValue *,
+                       const GlobalValue *);
   };
 
+  typedef ValueMap<const GlobalValue *, void *, AddressMapConfig>
+      GlobalAddressMapTy;
+
 private:
   ExecutionEngine &EE;
 
   /// GlobalAddressMap - A mapping between LLVM global values and their
   /// actualized version...
-  std::map<MapUpdatingCVH, void *> GlobalAddressMap;
+  GlobalAddressMapTy GlobalAddressMap;
 
   /// GlobalAddressReverseMap - This is the reverse mapping of GlobalAddressMap,
   /// used to convert raw addresses into the LLVM global value that is emitted
@@ -70,13 +68,9 @@ private:
   std::map<void *, AssertingVH<const GlobalValue> > GlobalAddressReverseMap;
 
 public:
-  ExecutionEngineState(ExecutionEngine &EE) : EE(EE) {}
+  ExecutionEngineState(ExecutionEngine &EE);
 
-  MapUpdatingCVH getVH(const GlobalValue *GV) {
-    return MapUpdatingCVH(*this, GV);
-  }
-
-  std::map<MapUpdatingCVH, void *> &
+  GlobalAddressMapTy &
   getGlobalAddressMap(const MutexGuard &) {
     return GlobalAddressMap;
   }
@@ -94,7 +88,7 @@ public:
 class ExecutionEngine {
   const TargetData *TD;
   ExecutionEngineState EEState;
-  bool LazyCompilationDisabled;
+  bool CompilingLazily;
   bool GVCompilationDisabled;
   bool SymbolSearchingDisabled;
   bool DlsymStubsEnabled;
@@ -269,12 +263,17 @@ public:
   /// getPointerToFunction - The different EE's represent function bodies in
   /// different ways.  They should each implement this to say what a function
   /// pointer should look like.  When F is destroyed, the ExecutionEngine will
-  /// remove its global mapping but will not yet free its machine code.  Call
-  /// freeMachineCodeForFunction(F) explicitly to do that.  Note that global
-  /// optimizations can destroy Functions without notifying the ExecutionEngine.
+  /// remove its global mapping and free any machine code.  Be sure no threads
+  /// are running inside F when that happens.
   ///
   virtual void *getPointerToFunction(Function *F) = 0;
 
+  /// getPointerToBasicBlock - The different EE's represent basic blocks in
+  /// different ways.  Return the representation for a blockaddress of the
+  /// specified block.
+  ///
+  virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0;
+  
   /// getPointerToFunctionOrStub - If the specified function has been
   /// code-gen'd, return a pointer to the function.  If not, compile it, or use
   /// a stub to implement lazy compilation if available.  See
@@ -326,13 +325,29 @@ public:
   virtual void RegisterJITEventListener(JITEventListener *) {}
   virtual void UnregisterJITEventListener(JITEventListener *) {}
 
-  /// DisableLazyCompilation - If called, the JIT will abort if lazy compilation
-  /// is ever attempted.
+  /// DisableLazyCompilation - When lazy compilation is off (the default), the
+  /// JIT will eagerly compile every function reachable from the argument to
+  /// getPointerToFunction.  If lazy compilation is turned on, the JIT will only
+  /// compile the one function and emit stubs to compile the rest when they're
+  /// first called.  If lazy compilation is turned off again while some lazy
+  /// stubs are still around, and one of those stubs is called, the program will
+  /// abort.
+  ///
+  /// In order to safely compile lazily in a threaded program, the user must
+  /// ensure that 1) only one thread at a time can call any particular lazy
+  /// stub, and 2) any thread modifying LLVM IR must hold the JIT's lock
+  /// (ExecutionEngine::lock) or otherwise ensure that no other thread calls a
+  /// lazy stub.  See http://llvm.org/PR5184 for details.
   void DisableLazyCompilation(bool Disabled = true) {
-    LazyCompilationDisabled = Disabled;
+    CompilingLazily = !Disabled;
+  }
+  bool isCompilingLazily() const {
+    return CompilingLazily;
   }
+  // Deprecated in favor of isCompilingLazily (to reduce double-negatives).
+  // Remove this in LLVM 2.8.
   bool isLazyCompilationDisabled() const {
-    return LazyCompilationDisabled;
+    return !CompilingLazily;
   }
 
   /// DisableGVCompilation - If called, the JIT will abort if it's asked to
@@ -485,15 +500,8 @@ class EngineBuilder {
   }
 
   ExecutionEngine *create();
-
 };
 
-inline bool operator<(const ExecutionEngineState::MapUpdatingCVH& lhs,
-                      const ExecutionEngineState::MapUpdatingCVH& rhs) {
-    return static_cast<const GlobalValue*>(lhs) <
-        static_cast<const GlobalValue*>(rhs);
-}
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index a2fed98..1301320 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -16,7 +16,7 @@
 #define GENERIC_VALUE_H
 
 #include "llvm/ADT/APInt.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index e895e73..dcc66b2 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
 #include <vector>
@@ -63,8 +63,11 @@ public:
   /// NotifyFreeingMachineCode - This is called inside of
   /// freeMachineCodeForFunction(), after the global mapping is removed, but
   /// before the machine code is returned to the allocator.  OldPtr is the
-  /// address of the machine code.
-  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr) {}
+  /// address of the machine code and will be the same as the Code parameter to
+  /// a previous NotifyFunctionEmitted call.  The Function passed to
+  /// NotifyFunctionEmitted may have been destroyed by the time of the matching
+  /// NotifyFreeingMachineCode call.
+  virtual void NotifyFreeingMachineCode(void *OldPtr) {}
 };
 
 // This returns NULL if support isn't available.
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 56851889..130612e 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
 #define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index 45d366f..df7eb43 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -51,10 +51,9 @@ protected:
   virtual BasicBlock *getSuccessorV(unsigned idx) const = 0;
   virtual unsigned getNumSuccessorsV() const = 0;
   virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
+  virtual TerminatorInst *clone_impl() const = 0;
 public:
 
-  virtual TerminatorInst *clone() const = 0;
-
   /// getNumSuccessors - Return the number of successors that this terminator
   /// has.
   unsigned getNumSuccessors() const {
@@ -117,7 +116,6 @@ public:
   static inline bool classof(const UnaryInstruction *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Alloca ||
-           I->getOpcode() == Instruction::Free ||
            I->getOpcode() == Instruction::Load ||
            I->getOpcode() == Instruction::VAArg ||
            I->getOpcode() == Instruction::ExtractValue ||
@@ -146,6 +144,7 @@ protected:
                  const Twine &Name, Instruction *InsertBefore);
   BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty,
                  const Twine &Name, BasicBlock *InsertAtEnd);
+  virtual BinaryOperator *clone_impl() const;
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -298,8 +297,6 @@ public:
     return static_cast<BinaryOps>(Instruction::getOpcode());
   }
 
-  virtual BinaryOperator *clone() const;
-
   /// swapOperands - Exchange the two operands to this instruction.
   /// This instruction is safe to use on any binary instruction and
   /// does not modify the semantics of the instruction.  If the instruction
@@ -718,6 +715,30 @@ public:
   /// @brief Determine if this is an equals/not equals predicate.
   bool isEquality();
 
+  /// @returns true if the comparison is signed, false otherwise.
+  /// @brief Determine if this instruction is using a signed comparison.
+  bool isSigned() const {
+    return isSigned(getPredicate());
+  }
+
+  /// @returns true if the comparison is unsigned, false otherwise.
+  /// @brief Determine if this instruction is using an unsigned comparison.
+  bool isUnsigned() const {
+    return isUnsigned(getPredicate());
+  }
+
+  /// This is just a convenience.
+  /// @brief Determine if this is true when both operands are the same.
+  bool isTrueWhenEqual() const {
+    return isTrueWhenEqual(getPredicate());
+  }
+
+  /// This is just a convenience.
+  /// @brief Determine if this is false when both operands are the same.
+  bool isFalseWhenEqual() const {
+    return isFalseWhenEqual(getPredicate());
+  }
+
   /// @returns true if the predicate is unsigned, false otherwise.
   /// @brief Determine if the predicate is an unsigned operation.
   static bool isUnsigned(unsigned short predicate);
@@ -732,6 +753,12 @@ public:
   /// @brief Determine if the predicate is an unordered operation.
   static bool isUnordered(unsigned short predicate);
 
+  /// Determine if the predicate is true when comparing a value with itself.
+  static bool isTrueWhenEqual(unsigned short predicate);
+
+  /// Determine if the predicate is false when comparing a value with itself.
+  static bool isFalseWhenEqual(unsigned short predicate);
+
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const CmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index 5c8fe3e..205f303 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -97,38 +97,38 @@
 HANDLE_TERM_INST  ( 1, Ret        , ReturnInst)
 HANDLE_TERM_INST  ( 2, Br         , BranchInst)
 HANDLE_TERM_INST  ( 3, Switch     , SwitchInst)
-HANDLE_TERM_INST  ( 4, Invoke     , InvokeInst)
-HANDLE_TERM_INST  ( 5, Unwind     , UnwindInst)
-HANDLE_TERM_INST  ( 6, Unreachable, UnreachableInst)
-  LAST_TERM_INST  ( 6)
+HANDLE_TERM_INST  ( 4, IndirectBr , IndirectBrInst)
+HANDLE_TERM_INST  ( 5, Invoke     , InvokeInst)
+HANDLE_TERM_INST  ( 6, Unwind     , UnwindInst)
+HANDLE_TERM_INST  ( 7, Unreachable, UnreachableInst)
+  LAST_TERM_INST  ( 7)
 
 // Standard binary operators...
- FIRST_BINARY_INST( 7)
-HANDLE_BINARY_INST( 7, Add  , BinaryOperator)
-HANDLE_BINARY_INST( 8, FAdd  , BinaryOperator)
-HANDLE_BINARY_INST( 9, Sub  , BinaryOperator)
-HANDLE_BINARY_INST(10, FSub  , BinaryOperator)
-HANDLE_BINARY_INST(11, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(12, FMul  , BinaryOperator)
-HANDLE_BINARY_INST(13, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(14, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(15, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(16, URem , BinaryOperator)
-HANDLE_BINARY_INST(17, SRem , BinaryOperator)
-HANDLE_BINARY_INST(18, FRem , BinaryOperator)
+ FIRST_BINARY_INST( 8)
+HANDLE_BINARY_INST( 8, Add  , BinaryOperator)
+HANDLE_BINARY_INST( 9, FAdd  , BinaryOperator)
+HANDLE_BINARY_INST(10, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(11, FSub  , BinaryOperator)
+HANDLE_BINARY_INST(12, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(13, FMul  , BinaryOperator)
+HANDLE_BINARY_INST(14, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(15, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(16, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(17, URem , BinaryOperator)
+HANDLE_BINARY_INST(18, SRem , BinaryOperator)
+HANDLE_BINARY_INST(19, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(19, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(20, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(21, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(22, And  , BinaryOperator)
-HANDLE_BINARY_INST(23, Or   , BinaryOperator)
-HANDLE_BINARY_INST(24, Xor  , BinaryOperator)
-  LAST_BINARY_INST(24)
+HANDLE_BINARY_INST(20, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(23, And  , BinaryOperator)
+HANDLE_BINARY_INST(24, Or   , BinaryOperator)
+HANDLE_BINARY_INST(25, Xor  , BinaryOperator)
+  LAST_BINARY_INST(25)
 
 // Memory operators...
- FIRST_MEMORY_INST(25)
-HANDLE_MEMORY_INST(25, Free  , FreeInst  )  // Heap management instructions
+ FIRST_MEMORY_INST(26)
 HANDLE_MEMORY_INST(26, Alloca, AllocaInst)  // Stack management
 HANDLE_MEMORY_INST(27, Load  , LoadInst  )  // Memory manipulation instrs
 HANDLE_MEMORY_INST(28, Store , StoreInst )
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index fdae3d7..07b3231 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -38,6 +38,7 @@ protected:
               Instruction *InsertBefore = 0);
   Instruction(const Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
               BasicBlock *InsertAtEnd);
+  virtual Instruction *clone_impl() const = 0;
 public:
   // Out of line virtual method, so the vtable, etc has a home.
   ~Instruction();
@@ -47,7 +48,7 @@ public:
   ///   * The instruction has no parent
   ///   * The instruction has no name
   ///
-  virtual Instruction *clone() const = 0;
+  Instruction *clone() const;
 
   /// isIdenticalTo - Return true if the specified instruction is exactly
   /// identical to the current one.  This means that all operands match and any
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index dbeb9e1..28854df 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -19,9 +19,7 @@
 #include "llvm/InstrTypes.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Attributes.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/CallingConv.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include <iterator>
 
@@ -34,22 +32,30 @@ class LLVMContext;
 class DominatorTree;
 
 //===----------------------------------------------------------------------===//
-//                             AllocationInst Class
+//                                AllocaInst Class
 //===----------------------------------------------------------------------===//
 
-/// AllocationInst - This class is the base class of AllocaInst.
+/// AllocaInst - an instruction to allocate memory on the stack
 ///
-class AllocationInst : public UnaryInstruction {
+class AllocaInst : public UnaryInstruction {
 protected:
-  AllocationInst(const Type *Ty, Value *ArraySize, 
-                 unsigned iTy, unsigned Align, const Twine &Name = "", 
-                 Instruction *InsertBefore = 0);
-  AllocationInst(const Type *Ty, Value *ArraySize,
-                 unsigned iTy, unsigned Align, const Twine &Name,
-                 BasicBlock *InsertAtEnd);
+  virtual AllocaInst *clone_impl() const;
 public:
+  explicit AllocaInst(const Type *Ty, Value *ArraySize = 0,
+                      const Twine &Name = "", Instruction *InsertBefore = 0);
+  AllocaInst(const Type *Ty, Value *ArraySize, 
+             const Twine &Name, BasicBlock *InsertAtEnd);
+
+  AllocaInst(const Type *Ty, const Twine &Name, Instruction *InsertBefore = 0);
+  AllocaInst(const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd);
+
+  AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+             const Twine &Name = "", Instruction *InsertBefore = 0);
+  AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+             const Twine &Name, BasicBlock *InsertAtEnd);
+
   // Out of line virtual method, so the vtable, etc. has a home.
-  virtual ~AllocationInst();
+  virtual ~AllocaInst();
 
   /// isArrayAllocation - Return true if there is an allocation size parameter
   /// to the allocation instruction that is not 1.
@@ -79,58 +85,6 @@ public:
   unsigned getAlignment() const { return (1u << SubclassData) >> 1; }
   void setAlignment(unsigned Align);
 
-  virtual AllocationInst *clone() const = 0;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const AllocationInst *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Alloca;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
-//                                AllocaInst Class
-//===----------------------------------------------------------------------===//
-
-/// AllocaInst - an instruction to allocate memory on the stack
-///
-class AllocaInst : public AllocationInst {
-public:
-  explicit AllocaInst(const Type *Ty,
-                      Value *ArraySize = 0,
-                      const Twine &NameStr = "",
-                      Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, ArraySize, Alloca,
-                     0, NameStr, InsertBefore) {}
-  AllocaInst(const Type *Ty,
-             Value *ArraySize, const Twine &NameStr,
-             BasicBlock *InsertAtEnd)
-    : AllocationInst(Ty, ArraySize, Alloca, 0, NameStr, InsertAtEnd) {}
-
-  AllocaInst(const Type *Ty, const Twine &NameStr,
-             Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, 0, Alloca, 0, NameStr, InsertBefore) {}
-  AllocaInst(const Type *Ty, const Twine &NameStr,
-             BasicBlock *InsertAtEnd)
-    : AllocationInst(Ty, 0, Alloca, 0, NameStr, InsertAtEnd) {}
-
-  AllocaInst(const Type *Ty, Value *ArraySize,
-             unsigned Align, const Twine &NameStr = "",
-             Instruction *InsertBefore = 0)
-    : AllocationInst(Ty, ArraySize, Alloca,
-                     Align, NameStr, InsertBefore) {}
-  AllocaInst(const Type *Ty, Value *ArraySize,
-             unsigned Align, const Twine &NameStr,
-             BasicBlock *InsertAtEnd)
-    : AllocationInst(Ty, ArraySize, Alloca,
-                     Align, NameStr, InsertAtEnd) {}
-
-  virtual AllocaInst *clone() const;
-
   /// isStaticAlloca - Return true if this alloca is in the entry block of the
   /// function and is a constant size.  If so, the code generator will fold it
   /// into the prolog/epilog code, so it is basically free.
@@ -148,35 +102,6 @@ public:
 
 
 //===----------------------------------------------------------------------===//
-//                                 FreeInst Class
-//===----------------------------------------------------------------------===//
-
-/// FreeInst - an instruction to deallocate memory
-///
-class FreeInst : public UnaryInstruction {
-  void AssertOK();
-public:
-  explicit FreeInst(Value *Ptr, Instruction *InsertBefore = 0);
-  FreeInst(Value *Ptr, BasicBlock *InsertAfter);
-
-  virtual FreeInst *clone() const;
-
-  // Accessor methods for consistency with other memory operations
-  Value *getPointerOperand() { return getOperand(0); }
-  const Value *getPointerOperand() const { return getOperand(0); }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FreeInst *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return (I->getOpcode() == Instruction::Free);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
 //                                LoadInst Class
 //===----------------------------------------------------------------------===//
 
@@ -185,6 +110,8 @@ public:
 ///
 class LoadInst : public UnaryInstruction {
   void AssertOK();
+protected:
+  virtual LoadInst *clone_impl() const;
 public:
   LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
   LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
@@ -215,8 +142,6 @@ public:
     SubclassData = (SubclassData & ~1) | (V ? 1 : 0);
   }
 
-  virtual LoadInst *clone() const;
-
   /// getAlignment - Return the alignment of the access that is being performed
   ///
   unsigned getAlignment() const {
@@ -254,6 +179,8 @@ public:
 class StoreInst : public Instruction {
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
   void AssertOK();
+protected:
+  virtual StoreInst *clone_impl() const;
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -292,8 +219,6 @@ public:
 
   void setAlignment(unsigned Align);
 
-  virtual StoreInst *clone() const;
-
   Value *getPointerOperand() { return getOperand(1); }
   const Value *getPointerOperand() const { return getOperand(1); }
   static unsigned getPointerOperandIndex() { return 1U; }
@@ -402,6 +327,8 @@ class GetElementPtrInst : public Instruction {
                     Instruction *InsertBefore = 0);
   GetElementPtrInst(Value *Ptr, Value *Idx,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual GetElementPtrInst *clone_impl() const;
 public:
   template<typename InputIterator>
   static GetElementPtrInst *Create(Value *Ptr, InputIterator IdxBegin,
@@ -475,8 +402,6 @@ public:
     return GEP;
   }
 
-  virtual GetElementPtrInst *clone() const;
-
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -623,6 +548,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 /// must be identical types.
 /// @brief Represent an integer comparison operator.
 class ICmpInst: public CmpInst {
+protected:
+  /// @brief Clone an indentical ICmpInst
+  virtual ICmpInst *clone_impl() const;  
 public:
   /// @brief Constructor with insert-before-instruction semantics.
   ICmpInst(
@@ -737,30 +665,6 @@ public:
     return !isEquality(P);
   }
 
-  /// @returns true if the predicate of this ICmpInst is signed, false otherwise
-  /// @brief Determine if this instruction's predicate is signed.
-  bool isSignedPredicate() const { return isSignedPredicate(getPredicate()); }
-
-  /// @returns true if the predicate provided is signed, false otherwise
-  /// @brief Determine if the predicate is signed.
-  static bool isSignedPredicate(Predicate pred);
-
-  /// @returns true if the specified compare predicate is
-  /// true when both operands are equal...
-  /// @brief Determine if the icmp is true when both operands are equal
-  static bool isTrueWhenEqual(ICmpInst::Predicate pred) {
-    return pred == ICmpInst::ICMP_EQ  || pred == ICmpInst::ICMP_UGE ||
-           pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE ||
-           pred == ICmpInst::ICMP_SLE;
-  }
-
-  /// @returns true if the specified compare instruction is
-  /// true when both operands are equal...
-  /// @brief Determine if the ICmpInst returns true when both operands are equal
-  bool isTrueWhenEqual() {
-    return isTrueWhenEqual(getPredicate());
-  }
-
   /// Initialize a set of values that all satisfy the predicate with C.
   /// @brief Make a ConstantRange for a relation with a constant value.
   static ConstantRange makeConstantRange(Predicate pred, const APInt &C);
@@ -775,8 +679,6 @@ public:
     Op<0>().swap(Op<1>());
   }
 
-  virtual ICmpInst *clone() const;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const ICmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -797,6 +699,9 @@ public:
 /// vectors of floating point values. The operands must be identical types.
 /// @brief Represents a floating point comparison operator.
 class FCmpInst: public CmpInst {
+protected:
+  /// @brief Clone an indentical FCmpInst
+  virtual FCmpInst *clone_impl() const;
 public:
   /// @brief Constructor with insert-before-instruction semantics.
   FCmpInst(
@@ -884,8 +789,6 @@ public:
     Op<0>().swap(Op<1>());
   }
 
-  virtual FCmpInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FCmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -953,6 +856,8 @@ class CallInst : public Instruction {
   explicit CallInst(Value *F, const Twine &NameStr,
                     Instruction *InsertBefore);
   CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual CallInst *clone_impl() const;
 public:
   template<typename InputIterator>
   static CallInst *Create(Value *Func,
@@ -1000,6 +905,9 @@ public:
                                    const Type *IntPtrTy, const Type *AllocTy,
                                    Value *ArraySize = 0, Function* MallocF = 0,
                                    const Twine &Name = "");
+  /// CreateFree - Generate the IR for a call to the builtin free function.
+  static void CreateFree(Value* Source, Instruction *InsertBefore);
+  static Instruction* CreateFree(Value* Source, BasicBlock *InsertAtEnd);
 
   ~CallInst();
 
@@ -1008,8 +916,6 @@ public:
     SubclassData = (SubclassData & ~1) | unsigned(isTC);
   }
 
-  virtual CallInst *clone() const;
-
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1177,6 +1083,8 @@ class SelectInst : public Instruction {
     init(C, S1, S2);
     setName(NameStr);
   }
+protected:
+  virtual SelectInst *clone_impl() const;
 public:
   static SelectInst *Create(Value *C, Value *S1, Value *S2,
                             const Twine &NameStr = "",
@@ -1207,8 +1115,6 @@ public:
     return static_cast<OtherOps>(Instruction::getOpcode());
   }
 
-  virtual SelectInst *clone() const;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SelectInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -1233,6 +1139,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
 /// an argument of the specified type given a va_list and increments that list
 ///
 class VAArgInst : public UnaryInstruction {
+protected:
+  virtual VAArgInst *clone_impl() const;
+
 public:
   VAArgInst(Value *List, const Type *Ty, const Twine &NameStr = "",
              Instruction *InsertBefore = 0)
@@ -1245,8 +1154,6 @@ public:
     setName(NameStr);
   }
 
-  virtual VAArgInst *clone() const;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const VAArgInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -1269,6 +1176,9 @@ class ExtractElementInst : public Instruction {
                      Instruction *InsertBefore = 0);
   ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
                      BasicBlock *InsertAtEnd);
+protected:
+  virtual ExtractElementInst *clone_impl() const;
+
 public:
   static ExtractElementInst *Create(Value *Vec, Value *Idx,
                                    const Twine &NameStr = "",
@@ -1285,8 +1195,6 @@ public:
   /// formed with the specified operands.
   static bool isValidOperands(const Value *Vec, const Value *Idx);
 
-  virtual ExtractElementInst *clone() const;
-
   Value *getVectorOperand() { return Op<0>(); }
   Value *getIndexOperand() { return Op<1>(); }
   const Value *getVectorOperand() const { return Op<0>(); }
@@ -1329,6 +1237,9 @@ class InsertElementInst : public Instruction {
                     Instruction *InsertBefore = 0);
   InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InsertElementInst *clone_impl() const;
+
 public:
   static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
                                    const Twine &NameStr = "",
@@ -1346,8 +1257,6 @@ public:
   static bool isValidOperands(const Value *Vec, const Value *NewElt,
                               const Value *Idx);
 
-  virtual InsertElementInst *clone() const;
-
   /// getType - Overload to return most specific vector type.
   ///
   const VectorType *getType() const {
@@ -1381,6 +1290,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
 /// input vectors.
 ///
 class ShuffleVectorInst : public Instruction {
+protected:
+  virtual ShuffleVectorInst *clone_impl() const;
+
 public:
   // allocate space for exactly three operands
   void *operator new(size_t s) {
@@ -1397,8 +1309,6 @@ public:
   static bool isValidOperands(const Value *V1, const Value *V2,
                               const Value *Mask);
 
-  virtual ShuffleVectorInst *clone() const;
-
   /// getType - Overload to return most specific vector type.
   ///
   const VectorType *getType() const {
@@ -1507,6 +1417,8 @@ class ExtractValueInst : public UnaryInstruction {
   void *operator new(size_t s) {
     return User::operator new(s, 1);
   }
+protected:
+  virtual ExtractValueInst *clone_impl() const;
 
 public:
   template<typename InputIterator>
@@ -1541,8 +1453,6 @@ public:
     return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertAtEnd);
   }
 
-  virtual ExtractValueInst *clone() const;
-
   /// getIndexedType - Returns the type of the element that would be extracted
   /// with an extractvalue instruction with the specified parameters.
   ///
@@ -1674,6 +1584,8 @@ class InsertValueInst : public Instruction {
                   Instruction *InsertBefore = 0);
   InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
                   const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InsertValueInst *clone_impl() const;
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -1711,8 +1623,6 @@ public:
     return new InsertValueInst(Agg, Val, Idx, NameStr, InsertAtEnd);
   }
 
-  virtual InsertValueInst *clone() const;
-
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1821,6 +1731,8 @@ class PHINode : public Instruction {
       ReservedSpace(0) {
     setName(NameStr);
   }
+protected:
+  virtual PHINode *clone_impl() const;
 public:
   static PHINode *Create(const Type *Ty, const Twine &NameStr = "",
                          Instruction *InsertBefore = 0) {
@@ -1840,8 +1752,6 @@ public:
     resizeOperands(NumValues*2);
   }
 
-  virtual PHINode *clone() const;
-
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1891,7 +1801,7 @@ public:
   
   
   void setIncomingBlock(unsigned i, BasicBlock *BB) {
-    setOperand(i*2+1, BB);
+    setOperand(i*2+1, (Value*)BB);
   }
   static unsigned getOperandNumForIncomingBlock(unsigned i) {
     return i*2+1;
@@ -1914,7 +1824,7 @@ public:
     // Initialize some new operands.
     NumOperands = OpNo+2;
     OperandList[OpNo] = V;
-    OperandList[OpNo+1] = BB;
+    OperandList[OpNo+1] = (Value*)BB;
   }
 
   /// removeIncomingValue - Remove an incoming value.  This is useful if a
@@ -1939,7 +1849,7 @@ public:
   int getBasicBlockIndex(const BasicBlock *BB) const {
     Use *OL = OperandList;
     for (unsigned i = 0, e = getNumOperands(); i != e; i += 2)
-      if (OL[i+1].get() == BB) return i/2;
+      if (OL[i+1].get() == (const Value*)BB) return i/2;
     return -1;
   }
 
@@ -2003,6 +1913,8 @@ private:
                       Instruction *InsertBefore = 0);
   ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
   explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+protected:
+  virtual ReturnInst *clone_impl() const;
 public:
   static ReturnInst* Create(LLVMContext &C, Value *retVal = 0,
                             Instruction *InsertBefore = 0) {
@@ -2017,8 +1929,6 @@ public:
   }
   virtual ~ReturnInst();
 
-  virtual ReturnInst *clone() const;
-
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -2078,6 +1988,8 @@ class BranchInst : public TerminatorInst {
   BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd);
   BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
              BasicBlock *InsertAtEnd);
+protected:
+  virtual BranchInst *clone_impl() const;
 public:
   static BranchInst *Create(BasicBlock *IfTrue, Instruction *InsertBefore = 0) {
     return new(1, true) BranchInst(IfTrue, InsertBefore);
@@ -2099,8 +2011,6 @@ public:
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  virtual BranchInst *clone() const;
-
   bool isUnconditional() const { return getNumOperands() == 1; }
   bool isConditional()   const { return getNumOperands() == 3; }
 
@@ -2118,7 +2028,7 @@ public:
   // targeting the specified block.
   // FIXME: Eliminate this ugly method.
   void setUnconditionalDest(BasicBlock *Dest) {
-    Op<-1>() = Dest;
+    Op<-1>() = (Value*)Dest;
     if (isConditional()) {  // Convert this to an uncond branch.
       Op<-2>() = 0;
       Op<-3>() = 0;
@@ -2136,7 +2046,7 @@ public:
 
   void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
     assert(idx < getNumSuccessors() && "Successor # out of range for Branch!");
-    *(&Op<-1>() - idx) = NewSucc;
+    *(&Op<-1>() - idx) = (Value*)NewSucc;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -2172,7 +2082,7 @@ class SwitchInst : public TerminatorInst {
   // Operand[1]    = Default basic block destination
   // Operand[2n  ] = Value to match
   // Operand[2n+1] = BasicBlock to go to on match
-  SwitchInst(const SwitchInst &RI);
+  SwitchInst(const SwitchInst &SI);
   void init(Value *Value, BasicBlock *Default, unsigned NumCases);
   void resizeOperands(unsigned No);
   // allocate space for exactly zero operands
@@ -2184,7 +2094,7 @@ class SwitchInst : public TerminatorInst {
   /// be specified here to make memory allocation more efficient.  This
   /// constructor can also autoinsert before another instruction.
   SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
-             Instruction *InsertBefore = 0);
+             Instruction *InsertBefore);
 
   /// SwitchInst ctor - Create a new switch instruction, specifying a value to
   /// switch on and a default destination.  The number of additional cases can
@@ -2192,6 +2102,8 @@ class SwitchInst : public TerminatorInst {
   /// constructor also autoinserts at the end of the specified BasicBlock.
   SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
              BasicBlock *InsertAtEnd);
+protected:
+  virtual SwitchInst *clone_impl() const;
 public:
   static SwitchInst *Create(Value *Value, BasicBlock *Default,
                             unsigned NumCases, Instruction *InsertBefore = 0) {
@@ -2269,8 +2181,6 @@ public:
   ///
   void removeCase(unsigned idx);
 
-  virtual SwitchInst *clone() const;
-
   unsigned getNumSuccessors() const { return getNumOperands()/2; }
   BasicBlock *getSuccessor(unsigned idx) const {
     assert(idx < getNumSuccessors() &&"Successor idx out of range for switch!");
@@ -2278,7 +2188,7 @@ public:
   }
   void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
     assert(idx < getNumSuccessors() && "Successor # out of range for switch!");
-    setOperand(idx*2+1, NewSucc);
+    setOperand(idx*2+1, (Value*)NewSucc);
   }
 
   // getSuccessorValue - Return the value associated with the specified
@@ -2310,6 +2220,105 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
 
 
 //===----------------------------------------------------------------------===//
+//                             IndirectBrInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// IndirectBrInst - Indirect Branch Instruction.
+///
+class IndirectBrInst : public TerminatorInst {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  unsigned ReservedSpace;
+  // Operand[0]    = Value to switch on
+  // Operand[1]    = Default basic block destination
+  // Operand[2n  ] = Value to match
+  // Operand[2n+1] = BasicBlock to go to on match
+  IndirectBrInst(const IndirectBrInst &IBI);
+  void init(Value *Address, unsigned NumDests);
+  void resizeOperands(unsigned No);
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Address to jump to.  The number of expected destinations can be specified
+  /// here to make memory allocation more efficient.  This constructor can also
+  /// autoinsert before another instruction.
+  IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore);
+  
+  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Address to jump to.  The number of expected destinations can be specified
+  /// here to make memory allocation more efficient.  This constructor also
+  /// autoinserts at the end of the specified BasicBlock.
+  IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
+protected:
+  virtual IndirectBrInst *clone_impl() const;
+public:
+  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
+                                Instruction *InsertBefore = 0) {
+    return new IndirectBrInst(Address, NumDests, InsertBefore);
+  }
+  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
+                                BasicBlock *InsertAtEnd) {
+    return new IndirectBrInst(Address, NumDests, InsertAtEnd);
+  }
+  ~IndirectBrInst();
+  
+  /// Provide fast operand accessors.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+  
+  // Accessor Methods for IndirectBrInst instruction.
+  Value *getAddress() { return getOperand(0); }
+  const Value *getAddress() const { return getOperand(0); }
+  void setAddress(Value *V) { setOperand(0, V); }
+  
+  
+  /// getNumDestinations - return the number of possible destinations in this
+  /// indirectbr instruction.
+  unsigned getNumDestinations() const { return getNumOperands()-1; }
+  
+  /// getDestination - Return the specified destination.
+  BasicBlock *getDestination(unsigned i) { return getSuccessor(i); }
+  const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); }
+  
+  /// addDestination - Add a destination.
+  ///
+  void addDestination(BasicBlock *Dest);
+  
+  /// removeDestination - This method removes the specified successor from the
+  /// indirectbr instruction.
+  void removeDestination(unsigned i);
+  
+  unsigned getNumSuccessors() const { return getNumOperands()-1; }
+  BasicBlock *getSuccessor(unsigned i) const {
+    return cast<BasicBlock>(getOperand(i+1));
+  }
+  void setSuccessor(unsigned i, BasicBlock *NewSucc) {
+    setOperand(i+1, (Value*)NewSucc);
+  }
+  
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IndirectBrInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::IndirectBr;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
+  
+  
+//===----------------------------------------------------------------------===//
 //                               InvokeInst Class
 //===----------------------------------------------------------------------===//
 
@@ -2361,6 +2370,8 @@ class InvokeInst : public TerminatorInst {
                     InputIterator ArgBegin, InputIterator ArgEnd,
                     unsigned Values,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InvokeInst *clone_impl() const;
 public:
   template<typename InputIterator>
   static InvokeInst *Create(Value *Func,
@@ -2383,8 +2394,6 @@ public:
                                   Values, NameStr, InsertAtEnd);
   }
 
-  virtual InvokeInst *clone() const;
-
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -2487,11 +2496,11 @@ public:
     return cast<BasicBlock>(getOperand(2));
   }
   void setNormalDest(BasicBlock *B) {
-    setOperand(1, B);
+    setOperand(1, (Value*)B);
   }
 
   void setUnwindDest(BasicBlock *B) {
-    setOperand(2, B);
+    setOperand(2, (Value*)B);
   }
 
   BasicBlock *getSuccessor(unsigned i) const {
@@ -2501,7 +2510,7 @@ public:
 
   void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
     assert(idx < 2 && "Successor # out of range for invoke!");
-    setOperand(idx+1, NewSucc);
+    setOperand(idx+1, (Value*)NewSucc);
   }
 
   unsigned getNumSuccessors() const { return 2; }
@@ -2565,6 +2574,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
 ///
 class UnwindInst : public TerminatorInst {
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+protected:
+  virtual UnwindInst *clone_impl() const;
 public:
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
@@ -2573,8 +2584,6 @@ public:
   explicit UnwindInst(LLVMContext &C, Instruction *InsertBefore = 0);
   explicit UnwindInst(LLVMContext &C, BasicBlock *InsertAtEnd);
 
-  virtual UnwindInst *clone() const;
-
   unsigned getNumSuccessors() const { return 0; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -2602,6 +2611,9 @@ private:
 ///
 class UnreachableInst : public TerminatorInst {
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+protected:
+  virtual UnreachableInst *clone_impl() const;
+
 public:
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
@@ -2610,8 +2622,6 @@ public:
   explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = 0);
   explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
 
-  virtual UnreachableInst *clone() const;
-
   unsigned getNumSuccessors() const { return 0; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -2634,6 +2644,10 @@ private:
 
 /// @brief This class represents a truncation of integer types.
 class TruncInst : public CastInst {
+protected:
+  /// @brief Clone an identical TruncInst
+  virtual TruncInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   TruncInst(
@@ -2651,9 +2665,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical TruncInst
-  virtual TruncInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const TruncInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2670,6 +2681,10 @@ public:
 
 /// @brief This class represents zero extension of integer types.
 class ZExtInst : public CastInst {
+protected:
+  /// @brief Clone an identical ZExtInst
+  virtual ZExtInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   ZExtInst(
@@ -2687,9 +2702,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical ZExtInst
-  virtual ZExtInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const ZExtInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2706,6 +2718,10 @@ public:
 
 /// @brief This class represents a sign extension of integer types.
 class SExtInst : public CastInst {
+protected:
+  /// @brief Clone an identical SExtInst
+  virtual SExtInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   SExtInst(
@@ -2723,9 +2739,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical SExtInst
-  virtual SExtInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SExtInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2742,6 +2755,10 @@ public:
 
 /// @brief This class represents a truncation of floating point types.
 class FPTruncInst : public CastInst {
+protected:
+  /// @brief Clone an identical FPTruncInst
+  virtual FPTruncInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPTruncInst(
@@ -2759,9 +2776,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical FPTruncInst
-  virtual FPTruncInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPTruncInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2778,6 +2792,10 @@ public:
 
 /// @brief This class represents an extension of floating point types.
 class FPExtInst : public CastInst {
+protected:
+  /// @brief Clone an identical FPExtInst
+  virtual FPExtInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPExtInst(
@@ -2795,9 +2813,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical FPExtInst
-  virtual FPExtInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPExtInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2814,6 +2829,10 @@ public:
 
 /// @brief This class represents a cast unsigned integer to floating point.
 class UIToFPInst : public CastInst {
+protected:
+  /// @brief Clone an identical UIToFPInst
+  virtual UIToFPInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   UIToFPInst(
@@ -2831,9 +2850,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical UIToFPInst
-  virtual UIToFPInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const UIToFPInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2850,6 +2866,10 @@ public:
 
 /// @brief This class represents a cast from signed integer to floating point.
 class SIToFPInst : public CastInst {
+protected:
+  /// @brief Clone an identical SIToFPInst
+  virtual SIToFPInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   SIToFPInst(
@@ -2867,9 +2887,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical SIToFPInst
-  virtual SIToFPInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SIToFPInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2886,6 +2903,10 @@ public:
 
 /// @brief This class represents a cast from floating point to unsigned integer
 class FPToUIInst  : public CastInst {
+protected:
+  /// @brief Clone an identical FPToUIInst
+  virtual FPToUIInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPToUIInst(
@@ -2903,9 +2924,6 @@ public:
     BasicBlock *InsertAtEnd       ///< Where to insert the new instruction
   );
 
-  /// @brief Clone an identical FPToUIInst
-  virtual FPToUIInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPToUIInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2922,6 +2940,10 @@ public:
 
 /// @brief This class represents a cast from floating point to signed integer.
 class FPToSIInst  : public CastInst {
+protected:
+  /// @brief Clone an identical FPToSIInst
+  virtual FPToSIInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   FPToSIInst(
@@ -2939,9 +2961,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical FPToSIInst
-  virtual FPToSIInst *clone() const;
-
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const FPToSIInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2976,7 +2995,7 @@ public:
   );
 
   /// @brief Clone an identical IntToPtrInst
-  virtual IntToPtrInst *clone() const;
+  virtual IntToPtrInst *clone_impl() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const IntToPtrInst *) { return true; }
@@ -2994,6 +3013,10 @@ public:
 
 /// @brief This class represents a cast from a pointer to an integer
 class PtrToIntInst : public CastInst {
+protected:
+  /// @brief Clone an identical PtrToIntInst
+  virtual PtrToIntInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   PtrToIntInst(
@@ -3011,9 +3034,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical PtrToIntInst
-  virtual PtrToIntInst *clone() const;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const PtrToIntInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -3030,6 +3050,10 @@ public:
 
 /// @brief This class represents a no-op cast from one type to another.
 class BitCastInst : public CastInst {
+protected:
+  /// @brief Clone an identical BitCastInst
+  virtual BitCastInst *clone_impl() const;
+
 public:
   /// @brief Constructor with insert-before-instruction semantics
   BitCastInst(
@@ -3047,9 +3071,6 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// @brief Clone an identical BitCastInst
-  virtual BitCastInst *clone() const;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const BitCastInst *) { return true; }
   static inline bool classof(const Instruction *I) {
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index e6d8007..c0cf00e 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -259,6 +259,11 @@ def int_longjmp    : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_i32_ty]>;
 def int_sigsetjmp  : Intrinsic<[llvm_i32_ty] , [llvm_ptr_ty, llvm_i32_ty]>;
 def int_siglongjmp : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_i32_ty]>;
 
+// Internal interface for object size checking
+def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i32_ty],
+                               [IntrReadArgMem]>,
+                               GCCBuiltin<"__builtin_object_size">;
+
 //===-------------------- Bit Manipulation Intrinsics ---------------------===//
 //
 
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 3342ea8..bcb98c1 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -91,7 +91,6 @@ namespace {
       (void) llvm::createLoopUnswitchPass();
       (void) llvm::createLoopRotatePass();
       (void) llvm::createLoopIndexSplitPass();
-      (void) llvm::createLowerAllocationsPass();
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerSetJmpPass();
       (void) llvm::createLowerSwitchPass();
@@ -107,7 +106,6 @@ namespace {
       (void) llvm::createPostDomPrinterPass();
       (void) llvm::createPostDomOnlyViewerPass();
       (void) llvm::createPostDomViewerPass();
-      (void) llvm::createRaiseAllocationsPass();
       (void) llvm::createReassociatePass();
       (void) llvm::createSCCPPass();
       (void) llvm::createScalarReplAggregatesPass();
@@ -125,7 +123,6 @@ namespace {
       (void) llvm::createNullProfilerRSPass();
       (void) llvm::createRSProfilingPass();
       (void) llvm::createInstCountPass();
-      (void) llvm::createCodeGenLICMPass();
       (void) llvm::createCodeGenPreparePass();
       (void) llvm::createGVNPass();
       (void) llvm::createMemCpyOptPass();
@@ -142,6 +139,9 @@ namespace {
       (void) llvm::createPartialInliningPass();
       (void) llvm::createSSIPass();
       (void) llvm::createSSIEverythingPass();
+      (void) llvm::createGEPSplitterPass();
+      (void) llvm::createSCCVNPass();
+      (void) llvm::createABCDPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h
index e66425a..e369e30 100644
--- a/include/llvm/MC/MCAsmLexer.h
+++ b/include/llvm/MC/MCAsmLexer.h
@@ -11,7 +11,7 @@
 #define LLVM_MC_MCASMLEXER_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 class MCAsmLexer;
diff --git a/include/llvm/MC/MCAsmParser.h b/include/llvm/MC/MCAsmParser.h
index c1b5d13..d530093 100644
--- a/include/llvm/MC/MCAsmParser.h
+++ b/include/llvm/MC/MCAsmParser.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_MC_MCASMPARSER_H
 #define LLVM_MC_MCASMPARSER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 class MCAsmLexer;
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 76ed3df..8656927 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -14,7 +14,7 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <vector> // FIXME: Shouldn't be needed.
 
 namespace llvm {
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index ef10b80..ffa0e41 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -9,7 +9,7 @@
 #ifndef MCDISASSEMBLER_H
 #define MCDISASSEMBLER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 7a2a0d5..4318628 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -11,7 +11,7 @@
 #define LLVM_MC_MCEXPR_H
 
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 class MCAsmInfo;
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index 0fc4d18..29b38dd 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -17,7 +17,7 @@
 #define LLVM_MC_MCINST_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 class raw_ostream;
@@ -43,7 +43,6 @@ class MCOperand {
 public:
   
   MCOperand() : Kind(kInvalid) {}
-  MCOperand(const MCOperand &RHS) { *this = RHS; }
 
   bool isValid() const { return Kind != kInvalid; }
   bool isReg() const { return Kind == kRegister; }
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 248e6b0..4d72f32 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_MC_MCSTREAMER_H
 #define LLVM_MC_MCSTREAMER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class MCAsmInfo;
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index d08f0e5..c6efe72 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -16,7 +16,7 @@
 
 #include <string>
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class MCAsmInfo;
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 62aca6e..4f5ab31 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_MC_MCVALUE_H
 #define LLVM_MC_MCVALUE_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/MC/MCSymbol.h"
 #include <cassert>
 
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index c983015..ed87d42 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -63,7 +63,7 @@ public:
   
   StringRef getString() const { return Str; }
 
-  unsigned getLength() const { return Str.size(); }
+  unsigned getLength() const { return (unsigned)Str.size(); }
 
   typedef StringRef::iterator iterator;
   
@@ -191,7 +191,7 @@ public:
 
   /// getNumElements - Return number of NamedMDNode elements.
   unsigned getNumElements() const {
-    return Node.size();
+    return (unsigned)Node.size();
   }
 
   /// addElement - Add metadata element.
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 501625d..70eba68 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -19,7 +19,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/Metadata.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <vector>
 
 namespace llvm {
@@ -252,6 +252,7 @@ public:
                                 AttrListPtr AttributeList,
                                 const Type *RetTy, ...)  END_WITH_NULL;
 
+  /// getOrInsertFunction - Same as above, but without the attributes.
   Constant *getOrInsertFunction(const StringRef &Name, const Type *RetTy, ...)
     END_WITH_NULL;
 
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 2791936..6bef2e7 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -29,7 +29,7 @@
 #ifndef LLVM_PASS_H
 #define LLVM_PASS_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 #include <utility>
 #include <vector>
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index 4c84878..b0ed33d 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -15,7 +15,7 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/Support/AlignOf.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cassert>
 #include <cstdlib>
 
diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h
index e9c8c7c..3846d46 100644
--- a/include/llvm/Support/ConstantRange.h
+++ b/include/llvm/Support/ConstantRange.h
@@ -33,7 +33,7 @@
 #define LLVM_SUPPORT_CONSTANT_RANGE_H
 
 #include "llvm/ADT/APInt.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index 6f82ea7..afa828c 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -28,39 +28,47 @@
 
 namespace llvm {
 
-// DebugFlag - This boolean is set to true if the '-debug' command line option
-// is specified.  This should probably not be referenced directly, instead, use
-// the DEBUG macro below.
-//
-#ifndef NDEBUG
-extern bool DebugFlag;
+/// DEBUG_TYPE macro - Files can specify a DEBUG_TYPE as a string, which causes
+/// all of their DEBUG statements to be activatable with -debug-only=thatstring.
+#ifndef DEBUG_TYPE
+#define DEBUG_TYPE ""
 #endif
-
-// isCurrentDebugType - Return true if the specified string is the debug type
-// specified on the command line, or if none was specified on the command line
-// with the -debug-only=X option.
-//
+  
 #ifndef NDEBUG
+/// DebugFlag - This boolean is set to true if the '-debug' command line option
+/// is specified.  This should probably not be referenced directly, instead, use
+/// the DEBUG macro below.
+///
+extern bool DebugFlag;
+  
+/// isCurrentDebugType - Return true if the specified string is the debug type
+/// specified on the command line, or if none was specified on the command line
+/// with the -debug-only=X option.
+///
 bool isCurrentDebugType(const char *Type);
-#else
-#define isCurrentDebugType(X) (false)
-#endif
-
-// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
-// information.  In the '-debug' option is specified on the commandline, and if
-// this is a debug build, then the code specified as the option to the macro
-// will be executed.  Otherwise it will not be.  Example:
-//
-// DEBUG_WITH_TYPE("bitset", errs() << "Bitset contains: " << Bitset << "\n");
-//
-// This will emit the debug information if -debug is present, and -debug-only is
-// not specified, or is specified as "bitset".
 
-#ifdef NDEBUG
-#define DEBUG_WITH_TYPE(TYPE, X) do { } while (0)
-#else
+/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified.  Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void SetCurrentDebugType(const char *Type);
+  
+/// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
+/// information.  In the '-debug' option is specified on the commandline, and if
+/// this is a debug build, then the code specified as the option to the macro
+/// will be executed.  Otherwise it will not be.  Example:
+///
+/// DEBUG_WITH_TYPE("bitset", errs() << "Bitset contains: " << Bitset << "\n");
+///
+/// This will emit the debug information if -debug is present, and -debug-only
+/// is not specified, or is specified as "bitset".
 #define DEBUG_WITH_TYPE(TYPE, X)                                        \
   do { if (DebugFlag && isCurrentDebugType(TYPE)) { X; } } while (0)
+
+#else
+#define isCurrentDebugType(X) (false)
+#define SetCurrentDebugType(X)
+#define DEBUG_WITH_TYPE(TYPE, X) do { } while (0)
 #endif
 
 // DEBUG macro - This macro should be used by passes to emit debug information.
@@ -70,11 +78,6 @@ bool isCurrentDebugType(const char *Type);
 //
 // DEBUG(errs() << "Bitset contains: " << Bitset << "\n");
 //
-
-#ifndef DEBUG_TYPE
-#define DEBUG_TYPE ""
-#endif
-
 #define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
   
 } // End llvm namespace
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index aa27946..e747c7a 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -21,7 +21,7 @@
 #ifndef LLVM_SUPPORT_ELF_H
 #define LLVM_SUPPORT_ELF_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cstring>
 
 namespace llvm {
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 83df9ed..4652e8f 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -253,6 +253,13 @@ public:
     return Insert(SwitchInst::Create(V, Dest, NumCases));
   }
 
+  /// CreateIndirectBr - Create an indirect branch instruction with the
+  /// specified address operand, with an optional hint for the number of
+  /// destinations that will be added (for efficient allocation).
+  IndirectBrInst *CreateIndirectBr(Value *Addr, unsigned NumDests = 10) {
+    return Insert(IndirectBrInst::Create(Addr, NumDests));
+  }
+
   /// CreateInvoke - Create an invoke instruction.
   template<typename InputIterator>
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
@@ -383,15 +390,21 @@ public:
     return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
   }
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+      if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
+        return LHS;  // LHS & -1 -> LHS
+      if (Constant *LC = dyn_cast<Constant>(LHS))
         return Folder.CreateAnd(LC, RC);
+    }
     return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
   }
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+      if (RC->isNullValue())
+        return LHS;  // LHS | 0 -> LHS
+      if (Constant *LC = dyn_cast<Constant>(LHS))
         return Folder.CreateOr(LC, RC);
+    }
     return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
   }
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
@@ -433,9 +446,6 @@ public:
                            const Twine &Name = "") {
     return Insert(new AllocaInst(Ty, ArraySize), Name);
   }
-  FreeInst *CreateFree(Value *Ptr) {
-    return Insert(new FreeInst(Ptr));
-  }
   // Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
   // converting the string to 'bool' for the isVolatile parameter.
   LoadInst *CreateLoad(Value *Ptr, const char *Name) {
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index 440657c..b2e5d58 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -160,13 +160,13 @@ public:
   RetTy visitReturnInst(ReturnInst &I)              { DELEGATE(TerminatorInst);}
   RetTy visitBranchInst(BranchInst &I)              { DELEGATE(TerminatorInst);}
   RetTy visitSwitchInst(SwitchInst &I)              { DELEGATE(TerminatorInst);}
+  RetTy visitIndirectBrInst(IndirectBrInst &I)      { DELEGATE(TerminatorInst);}
   RetTy visitInvokeInst(InvokeInst &I)              { DELEGATE(TerminatorInst);}
   RetTy visitUnwindInst(UnwindInst &I)              { DELEGATE(TerminatorInst);}
   RetTy visitUnreachableInst(UnreachableInst &I)    { DELEGATE(TerminatorInst);}
   RetTy visitICmpInst(ICmpInst &I)                  { DELEGATE(CmpInst);}
   RetTy visitFCmpInst(FCmpInst &I)                  { DELEGATE(CmpInst);}
-  RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(AllocationInst);}
-  RetTy visitFreeInst(FreeInst     &I)              { DELEGATE(Instruction); }
+  RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(Instruction); }
   RetTy visitLoadInst(LoadInst     &I)              { DELEGATE(Instruction); }
   RetTy visitStoreInst(StoreInst   &I)              { DELEGATE(Instruction); }
   RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); }
@@ -198,7 +198,6 @@ public:
   //
   RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction); }
   RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction); }
-  RetTy visitAllocationInst(AllocationInst &I) { DELEGATE(Instruction); }
   RetTy visitCmpInst(CmpInst &I)               { DELEGATE(Instruction); }
   RetTy visitCastInst(CastInst &I)             { DELEGATE(Instruction); }
 
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 6fa618e..438b021e 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_SUPPORT_MATHEXTRAS_H
 #define LLVM_SUPPORT_MATHEXTRAS_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index eb4784c..f9a4d6d 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -15,7 +15,7 @@
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h
index dec0f13..e193ca2 100644
--- a/include/llvm/Support/MemoryObject.h
+++ b/include/llvm/Support/MemoryObject.h
@@ -10,7 +10,7 @@
 #ifndef MEMORYOBJECT_H
 #define MEMORYOBJECT_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h
index d64993f..b851404 100644
--- a/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/include/llvm/Support/PointerLikeTypeTraits.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   
@@ -38,7 +38,7 @@ public:
     return static_cast<T*>(P);
   }
   
-  /// Note, we assume here that malloc returns objects at least 8-byte aligned.
+  /// Note, we assume here that malloc returns objects at least 4-byte aligned.
   /// However, this may be wrong, or pointers may be from something other than
   /// malloc.  In this case, you should specialize this template to reduce this.
   ///
diff --git a/include/llvm/Support/SlowOperationInformer.h b/include/llvm/Support/SlowOperationInformer.h
index b30aa98..524049c 100644
--- a/include/llvm/Support/SlowOperationInformer.h
+++ b/include/llvm/Support/SlowOperationInformer.h
@@ -31,7 +31,7 @@
 
 #include <string>
 #include <cassert>
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class SlowOperationInformer {
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index c71e6b9..1a6d06b 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -96,41 +96,43 @@ namespace llvm {
       return;
     }
     
-    if (UnitAtATime)
-      PM->add(createRaiseAllocationsPass());    // call %malloc -> malloc inst
-    PM->add(createCFGSimplificationPass());     // Clean up disgusting code
     if (UnitAtATime) {
       PM->add(createGlobalOptimizerPass());     // Optimize out global vars
-      PM->add(createGlobalDCEPass());           // Remove unused fns and globs
-      // IP Constant Propagation
-      PM->add(createIPConstantPropagationPass());
+      
+      PM->add(createIPSCCPPass());              // IP SCCP
       PM->add(createDeadArgEliminationPass());  // Dead argument elimination
     }
     PM->add(createInstructionCombiningPass());  // Clean up after IPCP & DAE
     PM->add(createCFGSimplificationPass());     // Clean up after IPCP & DAE
-    if (UnitAtATime) {
-      if (HaveExceptions)
-        PM->add(createPruneEHPass());           // Remove dead EH info
-      PM->add(createFunctionAttrsPass());       // Set readonly/readnone attrs
-    }
+    
+    // Start of CallGraph SCC passes.
+    if (UnitAtATime && HaveExceptions)
+      PM->add(createPruneEHPass());           // Remove dead EH info
     if (InliningPass)
       PM->add(InliningPass);
+    if (UnitAtATime)
+      PM->add(createFunctionAttrsPass());       // Set readonly/readnone attrs
     if (OptimizationLevel > 2)
       PM->add(createArgumentPromotionPass());   // Scalarize uninlined fn args
+    
+    // Start of function pass.
+    
+    PM->add(createScalarReplAggregatesPass());  // Break up aggregate allocas
     if (SimplifyLibCalls)
       PM->add(createSimplifyLibCallsPass());    // Library Call Optimizations
     PM->add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
     PM->add(createJumpThreadingPass());         // Thread jumps.
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
-    PM->add(createScalarReplAggregatesPass());  // Break up aggregate allocas
     PM->add(createInstructionCombiningPass());  // Combine silly seq's
+    
+    // FIXME: CondProp breaks critical edges, which is slow.
     PM->add(createCondPropagationPass());       // Propagate conditionals
     PM->add(createTailCallEliminationPass());   // Eliminate tail calls
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
     PM->add(createReassociatePass());           // Reassociate expressions
     PM->add(createLoopRotatePass());            // Rotate Loop
     PM->add(createLICMPass());                  // Hoist loop invariants
-    PM->add(createLoopUnswitchPass(OptimizeSize));
+    PM->add(createLoopUnswitchPass(OptimizeSize || OptimizationLevel < 3));
     PM->add(createInstructionCombiningPass());  
     PM->add(createIndVarSimplifyPass());        // Canonicalize indvars
     PM->add(createLoopDeletionPass());          // Delete dead loops
@@ -152,10 +154,15 @@ namespace llvm {
     if (UnitAtATime) {
       PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
       PM->add(createDeadTypeEliminationPass()); // Eliminate dead types
-    }
 
-    if (OptimizationLevel > 1 && UnitAtATime)
-      PM->add(createConstantMergePass());       // Merge dup global constants
+      // GlobalOpt already deletes dead functions and globals, at -O3 try a
+      // late pass of GlobalDCE.  It is capable of deleting dead cycles.
+      if (OptimizationLevel > 2)
+        PM->add(createGlobalDCEPass());         // Remove dead fns and globals.
+    
+      if (OptimizationLevel > 1)
+        PM->add(createConstantMergePass());       // Merge dup global constants
+    }
   }
 
   static inline void addOnePass(PassManager *PM, Pass *P, bool AndVerify) {
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index 8e28632..46ad9b6 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -179,6 +179,16 @@ public:
   Constant *CreatePtrToInt(Constant *C, const Type *DestTy) const {
     return CreateCast(Instruction::PtrToInt, C, DestTy);
   }
+  Constant *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getZExtOrBitCast(C, DestTy));
+  }
+  Constant *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getSExtOrBitCast(C, DestTy));
+  }
   Constant *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const {
     if (C->getType() == DestTy)
       return C; // avoid calling Fold
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index 54f1da9..8a0f55d 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SUPPORT_TIMER_H
 #define LLVM_SUPPORT_TIMER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/System/Mutex.h"
 #include <string>
 #include <vector>
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index e67ff85..66d6aaa 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -15,7 +15,7 @@
 #define LLVM_SUPPORT_RAW_OSTREAM_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class format_object_base;
diff --git a/include/llvm/System/AIXDataTypesFix.h b/include/llvm/System/AIXDataTypesFix.h
new file mode 100644
index 0000000..8dbf02f
--- /dev/null
+++ b/include/llvm/System/AIXDataTypesFix.h
@@ -0,0 +1,25 @@
+//===-- llvm/System/AIXDataTypesFix.h - Fix datatype defs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file overrides default system-defined types and limits which cannot be
+// done in DataTypes.h.in because it is processed by autoheader first, which
+// comments out any #undef statement
+//
+//===----------------------------------------------------------------------===//
+
+// No include guards desired!
+
+#ifndef SUPPORT_DATATYPES_H
+#error "AIXDataTypesFix.h must only be included via DataTypes.h!"
+#endif
+
+// GCC is strict about defining large constants: they must have LL modifier.
+// These will be defined properly at the end of DataTypes.h
+#undef INT64_MAX
+#undef INT64_MIN
diff --git a/include/llvm/System/Atomic.h b/include/llvm/System/Atomic.h
index 4ec117b..0c05d69 100644
--- a/include/llvm/System/Atomic.h
+++ b/include/llvm/System/Atomic.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_SYSTEM_ATOMIC_H
 #define LLVM_SYSTEM_ATOMIC_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   namespace sys {
diff --git a/include/llvm/System/DataTypes.h.cmake b/include/llvm/System/DataTypes.h.cmake
new file mode 100644
index 0000000..180c86c
--- /dev/null
+++ b/include/llvm/System/DataTypes.h.cmake
@@ -0,0 +1,152 @@
+/*===-- include/System/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functinons.                        *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
+
+#ifndef SUPPORT_DATATYPES_H
+#define SUPPORT_DATATYPES_H
+
+#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
+#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
+#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
+#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
+#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+#ifndef _MSC_VER
+
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
+#if !defined(__STDC_LIMIT_MACROS)
+# error "Must #define __STDC_LIMIT_MACROS before #including System/DataTypes.h"
+#endif
+
+#if !defined(__STDC_CONSTANT_MACROS)
+# error "Must #define __STDC_CONSTANT_MACROS before " \
+        "#including System/DataTypes.h"
+#endif
+
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#ifdef _AIX
+#include "llvm/System/AIXDataTypesFix.h"
+#endif
+
+/* Handle incorrect definition of uint64_t as u_int64_t */
+#ifndef HAVE_UINT64_T
+#ifdef HAVE_U_INT64_T
+typedef u_int64_t uint64_t;
+#else
+# error "Don't have a definition for uint64_t on this platform"
+#endif
+#endif
+
+#ifdef _OpenBSD_
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#endif
+
+#else /* _MSC_VER */
+/* Visual C++ doesn't provide standard integer headers, but it does provide
+   built-in data types. */
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/types.h>
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed int ssize_t;
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#define INT8_C(C)   C
+#define UINT8_C(C)  C
+#define INT16_C(C)  C
+#define UINT16_C(C) C
+#define INT32_C(C)  C
+#define UINT32_C(C) C ## U
+#define INT64_C(C)  ((int64_t) C ## LL)
+#define UINT64_C(C) ((uint64_t) C ## ULL)
+#endif /* _MSC_VER */
+
+/* Set defaults for constants which we cannot find. */
+#if !defined(INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+#endif
+#if !defined(INT64_MIN)
+# define INT64_MIN ((-INT64_MAX)-1)
+#endif
+#if !defined(UINT64_MAX)
+# define UINT64_MAX 0xffffffffffffffffULL
+#endif
+
+#if __GNUC__ > 3
+#define END_WITH_NULL __attribute__((sentinel))
+#else
+#define END_WITH_NULL
+#endif
+
+#ifndef HUGE_VALF
+#define HUGE_VALF (float)HUGE_VAL
+#endif
+
+#endif  /* SUPPORT_DATATYPES_H */
diff --git a/include/llvm/System/DataTypes.h.in b/include/llvm/System/DataTypes.h.in
new file mode 100644
index 0000000..d574910
--- /dev/null
+++ b/include/llvm/System/DataTypes.h.in
@@ -0,0 +1,147 @@
+/*===-- include/System/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functinons.                        *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
+
+#ifndef SUPPORT_DATATYPES_H
+#define SUPPORT_DATATYPES_H
+
+#undef HAVE_SYS_TYPES_H
+#undef HAVE_INTTYPES_H
+#undef HAVE_STDINT_H
+#undef HAVE_UINT64_T
+#undef HAVE_U_INT64_T
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+#ifndef _MSC_VER
+
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
+#if !defined(__STDC_LIMIT_MACROS)
+# error "Must #define __STDC_LIMIT_MACROS before #including System/DataTypes.h"
+#endif
+
+#if !defined(__STDC_CONSTANT_MACROS)
+# error "Must #define __STDC_CONSTANT_MACROS before " \
+        "#including System/DataTypes.h"
+#endif
+
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#ifdef _AIX
+#include "llvm/System/AIXDataTypesFix.h"
+#endif
+
+/* Handle incorrect definition of uint64_t as u_int64_t */
+#ifndef HAVE_UINT64_T
+#ifdef HAVE_U_INT64_T
+typedef u_int64_t uint64_t;
+#else
+# error "Don't have a definition for uint64_t on this platform"
+#endif
+#endif
+
+#ifdef _OpenBSD_
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#endif
+
+#else /* _MSC_VER */
+/* Visual C++ doesn't provide standard integer headers, but it does provide
+   built-in data types. */
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/types.h>
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed int ssize_t;
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#define INT8_C(C)   C
+#define UINT8_C(C)  C
+#define INT16_C(C)  C
+#define UINT16_C(C) C
+#define INT32_C(C)  C
+#define UINT32_C(C) C ## U
+#define INT64_C(C)  ((int64_t) C ## LL)
+#define UINT64_C(C) ((uint64_t) C ## ULL)
+#endif /* _MSC_VER */
+
+/* Set defaults for constants which we cannot find. */
+#if !defined(INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+#endif
+#if !defined(INT64_MIN)
+# define INT64_MIN ((-INT64_MAX)-1)
+#endif
+#if !defined(UINT64_MAX)
+# define UINT64_MAX 0xffffffffffffffffULL
+#endif
+
+#if __GNUC__ > 3
+#define END_WITH_NULL __attribute__((sentinel))
+#else
+#define END_WITH_NULL
+#endif
+
+#ifndef HUGE_VALF
+#define HUGE_VALF (float)HUGE_VAL
+#endif
+
+#endif  /* SUPPORT_DATATYPES_H */
diff --git a/include/llvm/System/Disassembler.h b/include/llvm/System/Disassembler.h
index 6d1cc0f..e11e792 100644
--- a/include/llvm/System/Disassembler.h
+++ b/include/llvm/System/Disassembler.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SYSTEM_DISASSEMBLER_H
 #define LLVM_SYSTEM_DISASSEMBLER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/System/Memory.h b/include/llvm/System/Memory.h
index d6300db..69251dd 100644
--- a/include/llvm/System/Memory.h
+++ b/include/llvm/System/Memory.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_SYSTEM_MEMORY_H
 #define LLVM_SYSTEM_MEMORY_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/System/TimeValue.h b/include/llvm/System/TimeValue.h
index 10997304..168e2a7 100644
--- a/include/llvm/System/TimeValue.h
+++ b/include/llvm/System/TimeValue.h
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 #ifndef LLVM_SYSTEM_TIMEVALUE_H
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h
index 58333e2..a709f52 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/Target/SubtargetFeature.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 #include <cstring>
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class raw_ostream;
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 4d65b19..edb8582 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -199,7 +199,7 @@ class Instruction {
   bit isReMaterializable = 0; // Is this instruction re-materializable?
   bit isPredicable = 0;     // Is this instruction predicable?
   bit hasDelaySlot = 0;     // Does this instruction have an delay slot?
-  bit usesCustomDAGSchedInserter = 0; // Pseudo instr needing special help.
+  bit usesCustomInserter = 0; // Pseudo instr needing special help.
   bit hasCtrlDep   = 0;     // Does this instruction r/w ctrl-flow chains?
   bit isNotDuplicable = 0;  // Is it unsafe to duplicate this instruction?
   bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index f8ea64b..af85f73 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -21,7 +21,7 @@
 #define LLVM_TARGET_TARGETDATA_H
 
 #include "llvm/Pass.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include <string>
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index d828a23..b0ed0bf 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -109,7 +109,7 @@ namespace TID {
     UnmodeledSideEffects,
     Commutable,
     ConvertibleTo3Addr,
-    UsesCustomDAGSchedInserter,
+    UsesCustomInserter,
     Rematerializable,
     CheapAsAMove,
     ExtraSrcRegAllocReq,
@@ -416,7 +416,7 @@ public:
     return Flags & (1 << TID::ConvertibleTo3Addr);
   }
   
-  /// usesCustomDAGSchedInsertionHook - Return true if this instruction requires
+  /// usesCustomInsertionHook - Return true if this instruction requires
   /// custom insertion support when the DAG scheduler is inserting it into a
   /// machine basic block.  If this is true for the instruction, it basically
   /// means that it is a pseudo instruction used at SelectionDAG time that is 
@@ -424,8 +424,8 @@ public:
   ///
   /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method
   /// is used to insert this into the MachineBasicBlock.
-  bool usesCustomDAGSchedInsertionHook() const {
-    return Flags & (1 << TID::UsesCustomDAGSchedInserter);
+  bool usesCustomInsertionHook() const {
+    return Flags & (1 << TID::UsesCustomInserter);
   }
   
   /// isRematerializable - Returns true if this instruction is a candidate for
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 919bef1..1d42c32 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -133,13 +133,34 @@ private:
                                                 AliasAnalysis *AA) const;
 
 public:
-  /// Return true if the instruction is a register to register move and return
-  /// the source and dest operands and their sub-register indices by reference.
+  /// isMoveInstr - Return true if the instruction is a register to register
+  /// move and return the source and dest operands and their sub-register
+  /// indices by reference.
   virtual bool isMoveInstr(const MachineInstr& MI,
                            unsigned& SrcReg, unsigned& DstReg,
                            unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
     return false;
   }
+
+  /// isIdentityCopy - Return true if the instruction is a copy (or
+  /// extract_subreg, insert_subreg, subreg_to_reg) where the source and
+  /// destination registers are the same.
+  bool isIdentityCopy(const MachineInstr &MI) const {
+    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+    if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+        SrcReg == DstReg)
+      return true;
+
+    if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG &&
+        MI.getOperand(0).getReg() == MI.getOperand(1).getReg())
+    return true;
+
+    if ((MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+         MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
+        MI.getOperand(0).getReg() == MI.getOperand(2).getReg())
+      return true;
+    return false;
+  }
   
   /// isLoadFromStackSlot - If the specified machine instruction is a direct
   /// load from a stack slot, return the virtual or physical register number of
@@ -384,9 +405,12 @@ public:
   /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
   /// instruction after load / store are unfolded from an instruction of the
   /// specified opcode. It returns zero if the specified unfolding is not
-  /// possible.
+  /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+  /// index of the operand which will hold the register holding the loaded
+  /// value.
   virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
-                                      bool UnfoldLoad, bool UnfoldStore) const {
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const {
     return 0;
   }
   
diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h
index 9545689..809f183 100644
--- a/include/llvm/Target/TargetJITInfo.h
+++ b/include/llvm/Target/TargetJITInfo.h
@@ -18,7 +18,7 @@
 #define LLVM_TARGET_TARGETJITINFO_H
 
 #include <cassert>
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class Function;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 4f567b0..8bc39d0 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -325,12 +325,11 @@ public:
   /// scalarizing vs using the wider vector type.
   virtual EVT getWidenVectorType(EVT VT) const;
 
-  typedef std::vector<APFloat>::const_iterator legal_fpimm_iterator;
-  legal_fpimm_iterator legal_fpimm_begin() const {
-    return LegalFPImmediates.begin();
-  }
-  legal_fpimm_iterator legal_fpimm_end() const {
-    return LegalFPImmediates.end();
+  /// isFPImmLegal - Returns true if the target can instruction select the
+  /// specified FP immediate natively. If false, the legalizer will materialize
+  /// the FP immediate as a load from a constant pool.
+  virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const {
+    return false;
   }
   
   /// isShuffleMaskLegal - Targets can use this to indicate that they only
@@ -1051,12 +1050,6 @@ protected:
     PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
   }
 
-  /// addLegalFPImmediate - Indicate that this target can instruction select
-  /// the specified FP immediate natively.
-  void addLegalFPImmediate(const APFloat& Imm) {
-    LegalFPImmediates.push_back(Imm);
-  }
-
   /// setTargetDAGCombine - Targets should invoke this method for each target
   /// independent node that they want to provide a custom DAG combiner for by
   /// implementing the PerformDAGCombine virtual method.
@@ -1432,14 +1425,15 @@ public:
                                             SelectionDAG &DAG) const;
   
   //===--------------------------------------------------------------------===//
-  // Scheduler hooks
+  // Instruction Emitting Hooks
   //
   
   // EmitInstrWithCustomInserter - This method should be implemented by targets
-  // that mark instructions with the 'usesCustomDAGSchedInserter' flag.  These
+  // that mark instructions with the 'usesCustomInserter' flag.  These
   // instructions are special in various ways, which require special support to
   // insert.  The specified MachineInstr is created but not inserted into any
-  // basic blocks, and the scheduler passes ownership of it to this method.
+  // basic blocks, and this method is called to expand it into a sequence of
+  // instructions, potentially also creating new basic blocks and control flow.
   // When new basic blocks are inserted and the edges from MBB to its successors
   // are modified, the method should insert pairs of <OldSucc, NewSucc> into the
   // DenseMap.
@@ -1696,8 +1690,6 @@ private:
 
   ValueTypeActionImpl ValueTypeActions;
 
-  std::vector<APFloat> LegalFPImmediates;
-
   std::vector<std::pair<EVT, TargetRegisterClass*> > AvailableRegClasses;
 
   /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 700c64c..f123d66 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -269,6 +269,10 @@ def externalsym : SDNode<"ISD::ExternalSymbol",       SDTPtrLeaf, [],
                          "ExternalSymbolSDNode">;
 def texternalsym: SDNode<"ISD::TargetExternalSymbol", SDTPtrLeaf, [],
                          "ExternalSymbolSDNode">;
+def blockaddress : SDNode<"ISD::BlockAddress",        SDTPtrLeaf, [],
+                         "BlockAddressSDNode">;
+def tblockaddress: SDNode<"ISD::TargetBlockAddress",  SDTPtrLeaf, [],
+                         "BlockAddressSDNode">;
 
 def add        : SDNode<"ISD::ADD"       , SDTIntBinOp   ,
                         [SDNPCommutative, SDNPAssociative]>;
@@ -325,6 +329,8 @@ def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
 def fsqrt      : SDNode<"ISD::FSQRT"      , SDTFPUnaryOp>;
 def fsin       : SDNode<"ISD::FSIN"       , SDTFPUnaryOp>;
 def fcos       : SDNode<"ISD::FCOS"       , SDTFPUnaryOp>;
+def fexp2      : SDNode<"ISD::FEXP2"      , SDTFPUnaryOp>;
+def flog2      : SDNode<"ISD::FLOG2"      , SDTFPUnaryOp>;
 def frint      : SDNode<"ISD::FRINT"      , SDTFPUnaryOp>;
 def ftrunc     : SDNode<"ISD::FTRUNC"     , SDTFPUnaryOp>;
 def fceil      : SDNode<"ISD::FCEIL"      , SDTFPUnaryOp>;
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 9189c43..5e17904 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -69,13 +69,6 @@ ModulePass *createGlobalOptimizerPass();
 
 
 //===----------------------------------------------------------------------===//
-/// createRaiseAllocationsPass - Return a new pass that transforms malloc and
-/// free function calls into malloc and free instructions.
-///
-ModulePass *createRaiseAllocationsPass();
-
-
-//===----------------------------------------------------------------------===//
 /// createDeadTypeEliminationPass - Return a new pass that eliminates symbol
 /// table entries for types that are never used.
 ///
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index fee4e65..523a8f4 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -225,15 +225,6 @@ extern const PassInfo *const LoopSimplifyID;
 
 //===----------------------------------------------------------------------===//
 //
-// LowerAllocations - Turn free instructions into @free calls.
-//
-//   AU.addRequiredID(LowerAllocationsID);
-//
-Pass *createLowerAllocationsPass();
-extern const PassInfo *const LowerAllocationsID;
-
-//===----------------------------------------------------------------------===//
-//
 // TailCallElimination - This pass eliminates call instructions to the current
 // function which occur immediately before return instructions.
 //
@@ -280,7 +271,7 @@ extern const PassInfo *const LCSSAID;
 // GVN - This pass performs global value numbering and redundant load 
 // elimination cotemporaneously.
 //
-FunctionPass *createGVNPass();
+FunctionPass *createGVNPass(bool NoPRE = false);
 
 //===----------------------------------------------------------------------===//
 //
@@ -316,12 +307,6 @@ FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0);
 
 //===----------------------------------------------------------------------===//
 //
-// CodeGenLICM - This pass performs late LICM; hoisting constants out of loops.
-//
-Pass *createCodeGenLICMPass();
-  
-//===----------------------------------------------------------------------===//
-//
 // InstructionNamer - Give any unnamed non-void instructions "tmp" names.
 //
 FunctionPass *createInstructionNamerPass();
@@ -341,6 +326,24 @@ FunctionPass *createSSIPass();
 //
 FunctionPass *createSSIEverythingPass();
 
+//===----------------------------------------------------------------------===//
+//
+// GEPSplitter - Split complex GEPs into simple ones
+//
+FunctionPass *createGEPSplitterPass();
+
+//===----------------------------------------------------------------------===//
+//
+// SCCVN - Aggressively eliminate redundant scalar values
+//
+FunctionPass *createSCCVNPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ABCD - Elimination of Array Bounds Checks on Demand
+//
+FunctionPass *createABCDPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index e766d72..8172114 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -116,8 +116,8 @@ bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
 /// split the critical edge.  This will update DominatorTree and
 /// DominatorFrontier information if it is available, thus calling this pass
-/// will not invalidate either of them. This returns true if the edge was split,
-/// false otherwise.  
+/// will not invalidate either of them. This returns the new block if the edge
+/// was split, null otherwise.
 ///
 /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
 /// specified successor will be merged into the same critical edge block.  
@@ -126,10 +126,16 @@ bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 /// dest go to one block instead of each going to a different block, but isn't 
 /// the standard definition of a "critical edge".
 ///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst.  Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
 BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
                               Pass *P = 0, bool MergeIdenticalEdges = false);
 
-inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, Pass *P = 0) {
+inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
+                                     Pass *P = 0) {
   return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(), P);
 }
 
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index ad99c74..2364330 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -50,7 +50,7 @@ class SSAUpdater {
 public:
   /// SSAUpdater constructor.  If InsertedPHIs is specified, it will be filled
   /// in with all PHI Nodes created by rewriting.
-  SSAUpdater(SmallVectorImpl<PHINode*> *InsertedPHIs = 0);
+  explicit SSAUpdater(SmallVectorImpl<PHINode*> *InsertedPHIs = 0);
   ~SSAUpdater();
 
   /// Initialize - Reset this object to get ready for a new set of SSA
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index d31edab..ed33413 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -20,10 +20,9 @@
 namespace llvm {
   class Value;
   class Instruction;
-  class LLVMContext;
   typedef DenseMap<const Value *, Value *> ValueMapTy;
 
-  Value *MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context);
+  Value *MapValue(const Value *V, ValueMapTy &VM);
   void RemapInstruction(Instruction *I, ValueMapTy &VM);
 } // End llvm namespace
 
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 4a470af..752635c 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -12,9 +12,8 @@
 #define LLVM_TYPE_H
 
 #include "llvm/AbstractTypeUser.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/System/Atomic.h"
 #include "llvm/ADT/GraphTraits.h"
 #include <string>
@@ -28,6 +27,7 @@ class IntegerType;
 class TypeMapBase;
 class raw_ostream;
 class Module;
+class LLVMContext;
 
 /// This file contains the declaration of the Type class.  For more "Type" type
 /// stuff, look in DerivedTypes.h.
diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h
index 4dd3a4a..d84196f 100644
--- a/include/llvm/TypeSymbolTable.h
+++ b/include/llvm/TypeSymbolTable.h
@@ -15,6 +15,7 @@
 #define LLVM_TYPE_SYMBOL_TABLE_H
 
 #include "llvm/Type.h"
+#include "llvm/ADT/StringRef.h"
 #include <map>
 
 namespace llvm {
@@ -69,12 +70,16 @@ public:
   /// Lookup the type associated with name.
   /// @returns end() if the name is not found, or an iterator at the entry for
   /// Type.
-  iterator find(const StringRef &name);
+  iterator find(const StringRef &Name) {
+    return tmap.find(Name);
+  }
 
   /// Lookup the type associated with name.
   /// @returns end() if the name is not found, or an iterator at the entry for
   /// Type.
-  const_iterator find(const StringRef &name) const;
+  const_iterator find(const StringRef &Name) const {
+    return tmap.find(Name);
+  }
 
   /// @returns true iff the symbol table is empty.
   /// @brief Determine if the symbol table is empty
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index c09fdfb..b485524 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -210,6 +210,7 @@ public:
     GlobalAliasVal,           // This is an instance of GlobalAlias
     GlobalVariableVal,        // This is an instance of GlobalVariable
     UndefValueVal,            // This is an instance of UndefValue
+    BlockAddressVal,          // This is an instance of BlockAddress
     ConstantExprVal,          // This is an instance of ConstantExpr
     ConstantAggregateZeroVal, // This is an instance of ConstantAggregateNull
     ConstantIntVal,           // This is an instance of ConstantInt
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index 4f8ebe8..b147c1e 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -16,7 +16,7 @@
 
 #include "llvm/Value.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   template<typename ValueSubClass, typename ItemParentClass>
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index c456990..0234965 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -239,7 +239,7 @@ bool llvm::isNoAliasCall(const Value *V) {
 ///    NoAlias returns
 ///
 bool llvm::isIdentifiedObject(const Value *V) {
-  if (isa<AllocationInst>(V) || isNoAliasCall(V))
+  if (isa<AllocaInst>(V) || isNoAliasCall(V))
     return true;
   if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
     return true;
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 272c871..030bcd2 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -28,8 +27,7 @@ static cl::opt<bool>
 PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
 
 namespace {
-  class VISIBILITY_HIDDEN AliasAnalysisCounter 
-      : public ModulePass, public AliasAnalysis {
+  class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
     unsigned No, May, Must;
     unsigned NoMR, JustRef, JustMod, MR;
     const char *Name;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index bb95c01..6a2564c 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SetVector.h"
 using namespace llvm;
@@ -45,7 +44,7 @@ static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
 static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
 
 namespace {
-  class VISIBILITY_HIDDEN AAEval : public FunctionPass {
+  class AAEval : public FunctionPass {
     unsigned NoAlias, MayAlias, MustAlias;
     unsigned NoModRef, Mod, Ref, ModRef;
 
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index 1e82621..cf4727f 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -23,14 +23,12 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Support/Compiler.h"
 #include <set>
 using namespace llvm;
 
 namespace {
   
-  class VISIBILITY_HIDDEN AliasDebugger 
-      : public ModulePass, public AliasAnalysis {
+  class AliasDebugger : public ModulePass, public AliasAnalysis {
 
     //What we do is simple.  Keep track of every value the AA could
     //know about, and verify that queries are one of those.
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index b056d00..c037c8d 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Type.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/Format.h"
@@ -297,12 +296,6 @@ bool AliasSetTracker::add(StoreInst *SI) {
   return NewPtr;
 }
 
-bool AliasSetTracker::add(FreeInst *FI) {
-  bool NewPtr;
-  addPointer(FI->getOperand(0), ~0, AliasSet::Mods, NewPtr);
-  return NewPtr;
-}
-
 bool AliasSetTracker::add(VAArgInst *VAAI) {
   bool NewPtr;
   addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr);
@@ -338,8 +331,6 @@ bool AliasSetTracker::add(Instruction *I) {
     return add(CI);
   else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
     return add(II);
-  else if (FreeInst *FI = dyn_cast<FreeInst>(I))
-    return add(FI);
   else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
     return add(VAAI);
   return true;
@@ -428,13 +419,6 @@ bool AliasSetTracker::remove(StoreInst *SI) {
   return true;
 }
 
-bool AliasSetTracker::remove(FreeInst *FI) {
-  AliasSet *AS = findAliasSetForPointer(FI->getOperand(0), ~0);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
 bool AliasSetTracker::remove(VAArgInst *VAAI) {
   AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0);
   if (!AS) return false;
@@ -460,8 +444,6 @@ bool AliasSetTracker::remove(Instruction *I) {
     return remove(SI);
   else if (CallInst *CI = dyn_cast<CallInst>(I))
     return remove(CI);
-  else if (FreeInst *FI = dyn_cast<FreeInst>(I))
-    return remove(FI);
   else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
     return remove(VAAI);
   return true;
@@ -599,7 +581,7 @@ AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-  class VISIBILITY_HIDDEN AliasSetPrinter : public FunctionPass {
+  class AliasSetPrinter : public FunctionPass {
     AliasSetTracker *Tracker;
   public:
     static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 756ffea..c81190b 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -15,7 +15,7 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -30,7 +30,6 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include <algorithm>
@@ -80,7 +79,7 @@ static bool isKnownNonNull(const Value *V) {
 /// object that never escapes from the function.
 static bool isNonEscapingLocalObject(const Value *V) {
   // If this is a local allocation, check to see if it escapes.
-  if (isa<AllocationInst>(V) || isNoAliasCall(V))
+  if (isa<AllocaInst>(V) || isNoAliasCall(V))
     return !PointerMayBeCaptured(V, false);
 
   // If this is an argument that corresponds to a byval or noalias argument,
@@ -104,7 +103,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size,
   const Type *AccessTy;
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     AccessTy = GV->getType()->getElementType();
-  } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     if (!AI->isArrayAllocation())
       AccessTy = AI->getType()->getElementType();
     else
@@ -139,7 +138,7 @@ namespace {
   /// implementations, in that it does not chain to a previous analysis.  As
   /// such it doesn't follow many of the rules that other alias analyses must.
   ///
-  struct VISIBILITY_HIDDEN NoAA : public ImmutablePass, public AliasAnalysis {
+  struct NoAA : public ImmutablePass, public AliasAnalysis {
     static char ID; // Class identification, replacement for typeinfo
     NoAA() : ImmutablePass(&ID) {}
     explicit NoAA(void *PID) : ImmutablePass(PID) { }
@@ -194,7 +193,7 @@ namespace {
   /// BasicAliasAnalysis - This is the default alias analysis implementation.
   /// Because it doesn't chain to a previous alias analysis (like -no-aa), it
   /// derives from the NoAA class.
-  struct VISIBILITY_HIDDEN BasicAliasAnalysis : public NoAA {
+  struct BasicAliasAnalysis : public NoAA {
     static char ID; // Class identification, replacement for typeinfo
     BasicAliasAnalysis() : NoAA(&ID) {}
     AliasResult alias(const Value *V1, unsigned V1Size,
@@ -218,7 +217,7 @@ namespace {
 
   private:
     // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call.
-    SmallPtrSet<const PHINode*, 16> VisitedPHIs;
+    SmallPtrSet<const Value*, 16> VisitedPHIs;
 
     // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
     // against another.
@@ -230,6 +229,10 @@ namespace {
     AliasResult aliasPHI(const PHINode *PN, unsigned PNSize,
                          const Value *V2, unsigned V2Size);
 
+    /// aliasSelect - Disambiguate a Select instruction against another value.
+    AliasResult aliasSelect(const SelectInst *SI, unsigned SISize,
+                            const Value *V2, unsigned V2Size);
+
     AliasResult aliasCheck(const Value *V1, unsigned V1Size,
                            const Value *V2, unsigned V2Size);
 
@@ -520,6 +523,41 @@ BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size,
   return MayAlias;
 }
 
+// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
+                                const Value *V2, unsigned V2Size) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for aliases between the values on corresponding arms.
+  if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+    if (SI->getCondition() == SI2->getCondition()) {
+      AliasResult Alias =
+        aliasCheck(SI->getTrueValue(), SISize,
+                   SI2->getTrueValue(), V2Size);
+      if (Alias == MayAlias)
+        return MayAlias;
+      AliasResult ThisAlias =
+        aliasCheck(SI->getFalseValue(), SISize,
+                   SI2->getFalseValue(), V2Size);
+      if (ThisAlias != Alias)
+        return MayAlias;
+      return Alias;
+    }
+
+  // If both arms of the Select node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  AliasResult Alias =
+    aliasCheck(SI->getTrueValue(), SISize, V2, V2Size);
+  if (Alias == MayAlias)
+    return MayAlias;
+  AliasResult ThisAlias =
+    aliasCheck(SI->getFalseValue(), SISize, V2, V2Size);
+  if (ThisAlias != Alias)
+    return MayAlias;
+  return Alias;
+}
+
 // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
 // against another.
 AliasAnalysis::AliasResult
@@ -529,6 +567,28 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
   if (!VisitedPHIs.insert(PN))
     return MayAlias;
 
+  // If the values are PHIs in the same block, we can do a more precise
+  // as well as efficient check: just check for aliases between the values
+  // on corresponding edges.
+  if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+    if (PN2->getParent() == PN->getParent()) {
+      AliasResult Alias =
+        aliasCheck(PN->getIncomingValue(0), PNSize,
+                   PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
+                   V2Size);
+      if (Alias == MayAlias)
+        return MayAlias;
+      for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+        AliasResult ThisAlias =
+          aliasCheck(PN->getIncomingValue(i), PNSize,
+                     PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+                     V2Size);
+        if (ThisAlias != Alias)
+          return MayAlias;
+      }
+      return Alias;
+    }
+
   SmallPtrSet<Value*, 4> UniqueSrc;
   SmallVector<Value*, 4> V1Srcs;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -543,7 +603,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
       V1Srcs.push_back(PV1);
   }
 
-  AliasResult Alias = aliasCheck(V1Srcs[0], PNSize, V2, V2Size);
+  AliasResult Alias = aliasCheck(V2, V2Size, V1Srcs[0], PNSize);
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
   if (Alias == MayAlias)
@@ -553,6 +613,12 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
   for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
     Value *V = V1Srcs[i];
+
+    // If V2 is a PHI, the recursive case will have been caught in the
+    // above aliasCheck call, so these subsequent calls to aliasCheck
+    // don't need to assume that V2 is being visited recursively.
+    VisitedPHIs.erase(V2);
+
     AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize);
     if (ThisAlias != Alias || ThisAlias == MayAlias)
       return MayAlias;
@@ -587,8 +653,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
       return NoAlias;
   
     // Arguments can't alias with local allocations or noalias calls.
-    if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) ||
-        (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1))))
+    if ((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+        (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))
       return NoAlias;
 
     // Most objects can't alias null.
@@ -629,6 +695,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
   if (const PHINode *PN = dyn_cast<PHINode>(V1))
     return aliasPHI(PN, V1Size, V2, V2Size);
 
+  if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const SelectInst *S1 = dyn_cast<SelectInst>(V1))
+    return aliasSelect(S1, V1Size, V2, V2Size);
+
   return MayAlias;
 }
 
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 08f070c..e06704b 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -20,11 +20,10 @@
 #include "llvm/Analysis/CFGPrinter.h"
 
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGViewer : public FunctionPass {
+  struct CFGViewer : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
     CFGViewer() : FunctionPass(&ID) {}
 
@@ -46,7 +45,7 @@ static RegisterPass<CFGViewer>
 V0("view-cfg", "View CFG of function", false, true);
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGOnlyViewer : public FunctionPass {
+  struct CFGOnlyViewer : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
     CFGOnlyViewer() : FunctionPass(&ID) {}
 
@@ -69,7 +68,7 @@ V1("view-cfg-only",
    "View CFG of function (with no function bodies)", false, true);
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGPrinter : public FunctionPass {
+  struct CFGPrinter : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     CFGPrinter() : FunctionPass(&ID) {}
     explicit CFGPrinter(void *pid) : FunctionPass(pid) {}
@@ -102,7 +101,7 @@ static RegisterPass<CFGPrinter>
 P1("dot-cfg", "Print CFG of function to 'dot' file", false, true);
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGOnlyPrinter : public FunctionPass {
+  struct CFGOnlyPrinter : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     CFGOnlyPrinter() : FunctionPass(&ID) {}
     explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d4be986..f21fd54 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -23,7 +23,7 @@ add_llvm_library(LLVMAnalysis
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
   LoopPass.cpp
-  MallocHelper.cpp
+  MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
   PointerTracking.cpp
   PostDominators.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index b30ac71..f615881 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -73,9 +73,6 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
       // captured.
       break;
     }
-    case Instruction::Free:
-      // Freeing a pointer does not cause it to be captured.
-      break;
     case Instruction::Load:
       // Loading from a pointer does not cause it to be captured.
       break;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 214caeb..33a5792 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -39,6 +39,138 @@ using namespace llvm;
 // Constant Folding internal helper functions
 //===----------------------------------------------------------------------===//
 
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with 
+/// TargetData.  This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+                             const TargetData &TD) {
+  
+  // This only handles casts to vectors currently.
+  const VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+  if (DestVTy == 0)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+  // vector so the code below can handle it uniformly.
+  if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+    Constant *Ops = C; // don't take the address of C!
+    return FoldBitCast(ConstantVector::get(&Ops, 1), DestTy, TD);
+  }
+  
+  // If this is a bitcast from constant vector -> vector, fold it.
+  ConstantVector *CV = dyn_cast<ConstantVector>(C);
+  if (CV == 0)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  // If the element types match, VMCore can fold it.
+  unsigned NumDstElt = DestVTy->getNumElements();
+  unsigned NumSrcElt = CV->getNumOperands();
+  if (NumDstElt == NumSrcElt)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  const Type *SrcEltTy = CV->getType()->getElementType();
+  const Type *DstEltTy = DestVTy->getElementType();
+  
+  // Otherwise, we're changing the number of elements in a vector, which 
+  // requires endianness information to do the right thing.  For example,
+  //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  // folds to (little endian):
+  //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  // and to (big endian):
+  //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  
+  // First thing is first.  We only want to think about integer here, so if
+  // we have something in FP form, recast it as integer.
+  if (DstEltTy->isFloatingPoint()) {
+    // Fold to an vector of integers with same size as our FP type.
+    unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+    const Type *DestIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+    // Recursively handle this integer conversion, if possible.
+    C = FoldBitCast(C, DestIVTy, TD);
+    if (!C) return ConstantExpr::getBitCast(C, DestTy);
+    
+    // Finally, VMCore can handle this now that #elts line up.
+    return ConstantExpr::getBitCast(C, DestTy);
+  }
+  
+  // Okay, we know the destination is integer, if the input is FP, convert
+  // it to integer first.
+  if (SrcEltTy->isFloatingPoint()) {
+    unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+    const Type *SrcIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+    // Ask VMCore to do the conversion now that #elts line up.
+    C = ConstantExpr::getBitCast(C, SrcIVTy);
+    CV = dyn_cast<ConstantVector>(C);
+    if (!CV)  // If VMCore wasn't able to fold it, bail out.
+      return C;
+  }
+  
+  // Now we know that the input and output vectors are both integer vectors
+  // of the same size, and that their #elements is not the same.  Do the
+  // conversion here, which depends on whether the input or output has
+  // more elements.
+  bool isLittleEndian = TD.isLittleEndian();
+  
+  SmallVector<Constant*, 32> Result;
+  if (NumDstElt < NumSrcElt) {
+    // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+    Constant *Zero = Constant::getNullValue(DstEltTy);
+    unsigned Ratio = NumSrcElt/NumDstElt;
+    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned SrcElt = 0;
+    for (unsigned i = 0; i != NumDstElt; ++i) {
+      // Build each element of the result.
+      Constant *Elt = Zero;
+      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+        if (!Src)  // Reject constantexpr elements.
+          return ConstantExpr::getBitCast(C, DestTy);
+        
+        // Zero extend the element to the right size.
+        Src = ConstantExpr::getZExt(Src, Elt->getType());
+        
+        // Shift it to the right place, depending on endianness.
+        Src = ConstantExpr::getShl(Src, 
+                                   ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+        
+        // Mix it in.
+        Elt = ConstantExpr::getOr(Elt, Src);
+      }
+      Result.push_back(Elt);
+    }
+  } else {
+    // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+    unsigned Ratio = NumDstElt/NumSrcElt;
+    unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+    
+    // Loop over each source value, expanding into multiple results.
+    for (unsigned i = 0; i != NumSrcElt; ++i) {
+      Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
+      if (!Src)  // Reject constantexpr elements.
+        return ConstantExpr::getBitCast(C, DestTy);
+      
+      unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        // Shift the piece of the value into the right place, depending on
+        // endianness.
+        Constant *Elt = ConstantExpr::getLShr(Src, 
+                                    ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+        
+        // Truncate and remember this piece.
+        Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+      }
+    }
+  }
+  
+  return ConstantVector::get(Result.data(), Result.size());
+}
+
+
 /// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
 /// from a global, return the global and the constant.  Because of
 /// constantexprs, this function is recursive.
@@ -103,6 +235,8 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
   assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
          "Out of range access");
   
+  // If this element is zero or undefined, we can just return since *CurPtr is
+  // zero initialized.
   if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
     return true;
   
@@ -115,7 +249,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     unsigned IntBytes = unsigned(CI->getBitWidth()/8);
     
     for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
-      CurPtr[i] = (unsigned char)(Val >> ByteOffset * 8);
+      CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
       ++ByteOffset;
     }
     return true;
@@ -123,13 +257,14 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
   
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
     if (CFP->getType()->isDoubleTy()) {
-      C = ConstantExpr::getBitCast(C, Type::getInt64Ty(C->getContext()));
+      C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
       return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
     }
     if (CFP->getType()->isFloatTy()){
-      C = ConstantExpr::getBitCast(C, Type::getInt32Ty(C->getContext()));
+      C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
       return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
     }
+    return false;
   }
 
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
@@ -161,6 +296,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
         return true;
 
       // Move to the next element of the struct.
+      CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
       BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
       ByteOffset = 0;
       CurEltOffset = NextEltOffset;
@@ -231,9 +367,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
     } else
       return 0;
 
-    C = ConstantExpr::getBitCast(C, MapTy);
+    C = FoldBitCast(C, MapTy, TD);
     if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
-      return ConstantExpr::getBitCast(Res, LoadTy);
+      return FoldBitCast(Res, LoadTy, TD);
     return 0;
   }
   
@@ -246,8 +382,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
     return 0;
   
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
-  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
-      !GV->hasDefinitiveInitializer() ||
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
       !GV->getInitializer()->getType()->isSized())
     return 0;
 
@@ -476,126 +611,11 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
   // If we ended up indexing a member with a type that doesn't match
   // the type of what the original indices indexed, add a cast.
   if (Ty != cast<PointerType>(ResultTy)->getElementType())
-    C = ConstantExpr::getBitCast(C, ResultTy);
+    C = FoldBitCast(C, ResultTy, *TD);
 
   return C;
 }
 
-/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with 
-/// targetdata.  Return 0 if unfoldable.
-static Constant *FoldBitCast(Constant *C, const Type *DestTy,
-                             const TargetData &TD, LLVMContext &Context) {
-  // If this is a bitcast from constant vector -> vector, fold it.
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
-    if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
-      // If the element types match, VMCore can fold it.
-      unsigned NumDstElt = DestVTy->getNumElements();
-      unsigned NumSrcElt = CV->getNumOperands();
-      if (NumDstElt == NumSrcElt)
-        return 0;
-      
-      const Type *SrcEltTy = CV->getType()->getElementType();
-      const Type *DstEltTy = DestVTy->getElementType();
-      
-      // Otherwise, we're changing the number of elements in a vector, which 
-      // requires endianness information to do the right thing.  For example,
-      //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
-      // folds to (little endian):
-      //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
-      // and to (big endian):
-      //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-      
-      // First thing is first.  We only want to think about integer here, so if
-      // we have something in FP form, recast it as integer.
-      if (DstEltTy->isFloatingPoint()) {
-        // Fold to an vector of integers with same size as our FP type.
-        unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
-        const Type *DestIVTy = VectorType::get(
-                                 IntegerType::get(Context, FPWidth), NumDstElt);
-        // Recursively handle this integer conversion, if possible.
-        C = FoldBitCast(C, DestIVTy, TD, Context);
-        if (!C) return 0;
-        
-        // Finally, VMCore can handle this now that #elts line up.
-        return ConstantExpr::getBitCast(C, DestTy);
-      }
-      
-      // Okay, we know the destination is integer, if the input is FP, convert
-      // it to integer first.
-      if (SrcEltTy->isFloatingPoint()) {
-        unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
-        const Type *SrcIVTy = VectorType::get(
-                                 IntegerType::get(Context, FPWidth), NumSrcElt);
-        // Ask VMCore to do the conversion now that #elts line up.
-        C = ConstantExpr::getBitCast(C, SrcIVTy);
-        CV = dyn_cast<ConstantVector>(C);
-        if (!CV) return 0;  // If VMCore wasn't able to fold it, bail out.
-      }
-      
-      // Now we know that the input and output vectors are both integer vectors
-      // of the same size, and that their #elements is not the same.  Do the
-      // conversion here, which depends on whether the input or output has
-      // more elements.
-      bool isLittleEndian = TD.isLittleEndian();
-      
-      SmallVector<Constant*, 32> Result;
-      if (NumDstElt < NumSrcElt) {
-        // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
-        Constant *Zero = Constant::getNullValue(DstEltTy);
-        unsigned Ratio = NumSrcElt/NumDstElt;
-        unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
-        unsigned SrcElt = 0;
-        for (unsigned i = 0; i != NumDstElt; ++i) {
-          // Build each element of the result.
-          Constant *Elt = Zero;
-          unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
-          for (unsigned j = 0; j != Ratio; ++j) {
-            Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
-            if (!Src) return 0;  // Reject constantexpr elements.
-            
-            // Zero extend the element to the right size.
-            Src = ConstantExpr::getZExt(Src, Elt->getType());
-            
-            // Shift it to the right place, depending on endianness.
-            Src = ConstantExpr::getShl(Src, 
-                             ConstantInt::get(Src->getType(), ShiftAmt));
-            ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
-            
-            // Mix it in.
-            Elt = ConstantExpr::getOr(Elt, Src);
-          }
-          Result.push_back(Elt);
-        }
-      } else {
-        // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
-        unsigned Ratio = NumDstElt/NumSrcElt;
-        unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
-        
-        // Loop over each source value, expanding into multiple results.
-        for (unsigned i = 0; i != NumSrcElt; ++i) {
-          Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
-          if (!Src) return 0;  // Reject constantexpr elements.
-
-          unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
-          for (unsigned j = 0; j != Ratio; ++j) {
-            // Shift the piece of the value into the right place, depending on
-            // endianness.
-            Constant *Elt = ConstantExpr::getLShr(Src, 
-                            ConstantInt::get(Src->getType(), ShiftAmt));
-            ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
-
-            // Truncate and remember this piece.
-            Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
-          }
-        }
-      }
-      
-      return ConstantVector::get(Result.data(), Result.size());
-    }
-  }
-  
-  return 0;
-}
 
 
 //===----------------------------------------------------------------------===//
@@ -721,11 +741,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
       if (TD &&
           TD->getPointerSizeInBits() <=
           CE->getType()->getScalarSizeInBits()) {
-        if (CE->getOpcode() == Instruction::PtrToInt) {
-          Constant *Input = CE->getOperand(0);
-          Constant *C = FoldBitCast(Input, DestTy, *TD, Context);
-          return C ? C : ConstantExpr::getBitCast(Input, DestTy);
-        }
+        if (CE->getOpcode() == Instruction::PtrToInt)
+          return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+        
         // If there's a constant offset added to the integer value before
         // it is casted back to a pointer, see if the expression can be
         // converted into a GEP.
@@ -771,8 +789,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
       return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::BitCast:
     if (TD)
-      if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD, Context))
-        return C;
+      return FoldBitCast(Ops[0], DestTy, *TD);
     return ConstantExpr::getBitCast(Ops[0], DestTy);
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 2bbe2e0..ab92e3f 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -35,7 +35,7 @@ PrintDirectory("print-fullpath",
                cl::Hidden);
 
 namespace {
-  class VISIBILITY_HIDDEN PrintDbgInfo : public FunctionPass {
+  class PrintDbgInfo : public FunctionPass {
     raw_ostream &Out;
     void printStopPoint(const DbgStopPointInst *DSI);
     void printFuncStart(const DbgFuncStartInst *FS);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 7bb7e9b..7bff11e 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -84,8 +84,11 @@ DIDescriptor::getStringField(unsigned Elt) const {
     return NULL;
 
   if (Elt < DbgNode->getNumElements())
-    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt)))
+    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt))) {
+      if (MDS->getLength() == 0)
+        return NULL;
       return MDS->getString().data();
+    }
 
   return NULL;
 }
@@ -398,10 +401,10 @@ bool DIVariable::Verify() const {
 /// getOriginalTypeSize - If this type is derived from a base type then
 /// return base type size.
 uint64_t DIDerivedType::getOriginalTypeSize() const {
-  if (getTag() != dwarf::DW_TAG_member)
-    return getSizeInBits();
   DIType BT = getTypeDerivedFrom();
-  if (BT.getTag() != dwarf::DW_TAG_base_type)
+  if (!BT.isNull() && BT.isDerivedType())
+    return DIDerivedType(BT.getNode()).getOriginalTypeSize();
+  if (BT.isNull())
     return getSizeInBits();
   return BT.getSizeInBits();
 }
@@ -695,6 +698,32 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
   return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
+
+/// CreateBasicType - Create a basic type like int, float, etc.
+DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
+                                         StringRef Name,
+                                         DICompileUnit CompileUnit,
+                                         unsigned LineNumber,
+                                         Constant *SizeInBits,
+                                         Constant *AlignInBits,
+                                         Constant *OffsetInBits, unsigned Flags,
+                                         unsigned Encoding) {
+  Value *Elts[] = {
+    GetTagConstant(dwarf::DW_TAG_base_type),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    SizeInBits,
+    AlignInBits,
+    OffsetInBits,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+  };
+  return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
+}
+
+
 /// CreateDerivedType - Create a derived type like const qualified type,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
@@ -722,6 +751,35 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
   return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
+
+/// CreateDerivedType - Create a derived type like const qualified type,
+/// pointer, typedef, etc.
+DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
+                                             DIDescriptor Context,
+                                             StringRef Name,
+                                             DICompileUnit CompileUnit,
+                                             unsigned LineNumber,
+                                             Constant *SizeInBits,
+                                             Constant *AlignInBits,
+                                             Constant *OffsetInBits,
+                                             unsigned Flags,
+                                             DIType DerivedFrom) {
+  Value *Elts[] = {
+    GetTagConstant(Tag),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    SizeInBits,
+    AlignInBits,
+    OffsetInBits,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom.getNode(),
+  };
+  return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
+}
+
+
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIDescriptor Context,
@@ -754,6 +812,38 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
 }
 
 
+/// CreateCompositeType - Create a composite type like array, struct, etc.
+DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
+                                                 DIDescriptor Context,
+                                                 StringRef Name,
+                                                 DICompileUnit CompileUnit,
+                                                 unsigned LineNumber,
+                                                 Constant *SizeInBits,
+                                                 Constant *AlignInBits,
+                                                 Constant *OffsetInBits,
+                                                 unsigned Flags,
+                                                 DIType DerivedFrom,
+                                                 DIArray Elements,
+                                                 unsigned RuntimeLang) {
+
+  Value *Elts[] = {
+    GetTagConstant(Tag),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    SizeInBits,
+    AlignInBits,
+    OffsetInBits,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom.getNode(),
+    Elements.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+  };
+  return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
+}
+
+
 /// CreateSubprogram - Create a new descriptor for the specified subprogram.
 /// See comments in DISubprogram for descriptions of these fields.  This
 /// method does not unique the generated descriptors.
@@ -1217,9 +1307,10 @@ namespace llvm {
       // Look for the bitcast.
       for (Value::use_const_iterator I = V->use_begin(), E =V->use_end();
             I != E; ++I)
-        if (isa<BitCastInst>(I))
-          return findDbgDeclare(*I, false);
-
+        if (isa<BitCastInst>(I)) {
+          const DbgDeclareInst *DDI = findDbgDeclare(*I, false);
+          if (DDI) return DDI;
+        }
       return 0;
     }
 
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 1c9159d..17f304c 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -59,12 +59,11 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/System/Atomic.h"
@@ -126,8 +125,8 @@ namespace {
     static bool isPod() { return true; }
   };
 
-  class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis,
-                                      private InstVisitor<Andersens> {
+  class Andersens : public ModulePass, public AliasAnalysis,
+                    private InstVisitor<Andersens> {
     struct Node;
 
     /// Constraint - Objects of this structure are used to represent the various
@@ -594,11 +593,12 @@ namespace {
     void visitReturnInst(ReturnInst &RI);
     void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
     void visitCallInst(CallInst &CI) { 
-      if (isMalloc(&CI)) visitAllocationInst(CI);
+      if (isMalloc(&CI)) visitAlloc(CI);
       else visitCallSite(CallSite(&CI)); 
     }
     void visitCallSite(CallSite CS);
-    void visitAllocationInst(Instruction &I);
+    void visitAllocaInst(AllocaInst &I);
+    void visitAlloc(Instruction &I);
     void visitLoadInst(LoadInst &LI);
     void visitStoreInst(StoreInst &SI);
     void visitGetElementPtrInst(GetElementPtrInst &GEP);
@@ -792,7 +792,7 @@ void Andersens::IdentifyObjects(Module &M) {
       // object.
       if (isa<PointerType>(II->getType())) {
         ValueNodes[&*II] = NumObjects++;
-        if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II))
+        if (AllocaInst *AI = dyn_cast<AllocaInst>(&*II))
           ObjectNodes[AI] = NumObjects++;
         else if (isMalloc(&*II))
           ObjectNodes[&*II] = NumObjects++;
@@ -1016,6 +1016,8 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) {
       }
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
       if (AnalyzeUsesOfFunction(GEP)) return true;
+    } else if (isFreeCall(*UI)) {
+      return false;
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
@@ -1037,8 +1039,6 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) {
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true;  // Allow comparison against null.
-    } else if (isa<FreeInst>(*UI)) {
-      return false;
     } else {
       return true;
     }
@@ -1156,7 +1156,6 @@ void Andersens::visitInstruction(Instruction &I) {
   case Instruction::Switch:
   case Instruction::Unwind:
   case Instruction::Unreachable:
-  case Instruction::Free:
   case Instruction::ICmp:
   case Instruction::FCmp:
     return;
@@ -1167,7 +1166,11 @@ void Andersens::visitInstruction(Instruction &I) {
   }
 }
 
-void Andersens::visitAllocationInst(Instruction &I) {
+void Andersens::visitAllocaInst(AllocaInst &I) {
+  visitAlloc(I);
+}
+
+void Andersens::visitAlloc(Instruction &I) {
   unsigned ObjectIndex = getObject(&I);
   GraphNodes[ObjectIndex].setValue(&I);
   Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I),
@@ -2819,7 +2822,7 @@ void Andersens::PrintNode(const Node *N) const {
   else
     errs() << "(unnamed)";
 
-  if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isMalloc(V))
+  if (isa<GlobalValue>(V) || isa<AllocaInst>(V) || isMalloc(V))
     if (N == &GraphNodes[getObject(V)])
       errs() << "<mem>";
 }
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index e2b288d..9cd8bb8 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -26,7 +25,7 @@ namespace {
 //===----------------------------------------------------------------------===//
 // BasicCallGraph class definition
 //
-class VISIBILITY_HIDDEN BasicCallGraph : public CallGraph, public ModulePass {
+class BasicCallGraph : public CallGraph, public ModulePass {
   // Root is root of the call graph, or the external node if a 'main' function
   // couldn't be found.
   //
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 7949288..ddd6ff9 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -23,8 +23,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/MallocHelper.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -44,7 +43,7 @@ namespace {
   /// function in the program.  Later, the entries for these functions are
   /// removed if the function is found to call an external function (in which
   /// case we know nothing about it.
-  struct VISIBILITY_HIDDEN FunctionRecord {
+  struct FunctionRecord {
     /// GlobalInfo - Maintain mod/ref info for all of the globals without
     /// addresses taken that are read or written (transitively) by this
     /// function.
@@ -69,8 +68,7 @@ namespace {
   };
 
   /// GlobalsModRef - The actual analysis pass.
-  class VISIBILITY_HIDDEN GlobalsModRef
-      : public ModulePass, public AliasAnalysis {
+  class GlobalsModRef : public ModulePass, public AliasAnalysis {
     /// NonAddressTakenGlobals - The globals that do not have their addresses
     /// taken.
     std::set<GlobalValue*> NonAddressTakenGlobals;
@@ -240,6 +238,8 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
     } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
       if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
         return true;
+    } else if (isFreeCall(*UI)) {
+      Writers.push_back(cast<Instruction>(*UI)->getParent()->getParent());
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
@@ -261,8 +261,6 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true;  // Allow comparison against null.
-    } else if (FreeInst *F = dyn_cast<FreeInst>(*UI)) {
-      Writers.push_back(F->getParent()->getParent());
     } else {
       return true;
     }
@@ -439,7 +437,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
           if (cast<StoreInst>(*II).isVolatile())
             // Treat volatile stores as reading memory somewhere.
             FunctionEffect |= Ref;
-        } else if (isMalloc(&cast<Instruction>(*II)) || isa<FreeInst>(*II)) {
+        } else if (isMalloc(&cast<Instruction>(*II)) ||
+                   isFreeCall(&cast<Instruction>(*II))) {
           FunctionEffect |= ModRef;
         }
 
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index b833baa..bd9377b 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -31,6 +31,9 @@ unsigned InlineCostAnalyzer::FunctionInfo::
       // Eliminating a switch is a big win, proportional to the number of edges
       // deleted.
       Reduction += (SI->getNumSuccessors()-1) * 40;
+    else if (isa<IndirectBrInst>(*UI))
+      // Eliminating an indirect branch is a big win.
+      Reduction += 200;
     else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
       // Turning an indirect call into a direct call is a BIG win
       Reduction += CI->getCalledValue() == V ? 500 : 0;
@@ -50,7 +53,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
       // Unfortunately, we don't know the pointer that may get propagated here,
       // so we can't make this decision.
       if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
-          isa<AllocationInst>(Inst)) 
+          isa<AllocaInst>(Inst)) 
         continue;
 
       bool AllOperandsConstant = true;
@@ -130,10 +133,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
         NumInsts += InlineConstants::CallPenalty;
     }
     
-    // These, too, are calls.
-    if (isa<FreeInst>(II))
-      NumInsts += InlineConstants::CallPenalty;
-
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
       if (!AI->isStaticAlloca())
         this->usesDynamicAlloca = true;
@@ -147,19 +146,26 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
       if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
           isa<PtrToIntInst>(CI))
         continue;
-    } else if (const GetElementPtrInst *GEPI =
-               dyn_cast<GetElementPtrInst>(II)) {
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
       // If a GEP has all constant indices, it will probably be folded with
       // a load/store.
       if (GEPI->hasAllConstantIndices())
         continue;
     }
 
-    if (isa<ReturnInst>(II))
-      ++NumRets;
-    
     ++NumInsts;
   }
+  
+  if (isa<ReturnInst>(BB->getTerminator()))
+    ++NumRets;
+  
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this indirect
+  // jump would jump from the inlined copy of the function into the original
+  // function which is extremely undefined behavior.
+  if (isa<IndirectBrInst>(BB->getTerminator()))
+    NeverInline = true;
 }
 
 /// analyzeFunction - Fill in the current structure with information gleaned
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 4cde793..a4b041f 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -34,8 +33,7 @@ STATISTIC(TotalMemInst, "Number of memory instructions");
 
 
 namespace {
-  class VISIBILITY_HIDDEN InstCount 
-      : public FunctionPass, public InstVisitor<InstCount> {
+  class InstCount : public FunctionPass, public InstVisitor<InstCount> {
     friend class InstVisitor<InstCount>;
 
     void visitFunction  (Function &F) { ++TotalFuncs; }
@@ -76,11 +74,11 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
 bool InstCount::runOnFunction(Function &F) {
   unsigned StartMemInsts =
     NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
-    NumInvokeInst + NumAllocaInst + NumFreeInst;
+    NumInvokeInst + NumAllocaInst;
   visit(F);
   unsigned EndMemInsts =
     NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
-    NumInvokeInst + NumAllocaInst + NumFreeInst;
+    NumInvokeInst + NumAllocaInst;
   TotalMemInst += EndMemInsts-StartMemInsts;
   return false;
 }
diff --git a/lib/Analysis/MallocHelper.cpp b/lib/Analysis/MallocHelper.cpp
deleted file mode 100644
index e7bb41e..0000000
--- a/lib/Analysis/MallocHelper.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-//===-- MallocHelper.cpp - Functions to identify malloc calls -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This family of functions identifies calls to malloc, bitcasts of malloc
-// calls, and the types and array sizes associated with them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/MallocHelper.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/ConstantFolding.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//  malloc Call Utility Functions.
-//
-
-/// isMalloc - Returns true if the the value is either a malloc call or a
-/// bitcast of the result of a malloc call.
-bool llvm::isMalloc(const Value* I) {
-  return extractMallocCall(I) || extractMallocCallFromBitCast(I);
-}
-
-static bool isMallocCall(const CallInst *CI) {
-  if (!CI)
-    return false;
-
-  const Module* M = CI->getParent()->getParent()->getParent();
-  Function *MallocFunc = M->getFunction("malloc");
-
-  if (CI->getOperand(0) != MallocFunc)
-    return false;
-
-  // Check malloc prototype.
-  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
-  // attribute will exist.
-  const FunctionType *FTy = MallocFunc->getFunctionType();
-  if (FTy->getNumParams() != 1)
-    return false;
-  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
-    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
-      return false;
-    return true;
-  }
-
-  return false;
-}
-
-/// extractMallocCall - Returns the corresponding CallInst if the instruction
-/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
-/// ignore InvokeInst here.
-const CallInst* llvm::extractMallocCall(const Value* I) {
-  const CallInst *CI = dyn_cast<CallInst>(I);
-  return (isMallocCall(CI)) ? CI : NULL;
-}
-
-CallInst* llvm::extractMallocCall(Value* I) {
-  CallInst *CI = dyn_cast<CallInst>(I);
-  return (isMallocCall(CI)) ? CI : NULL;
-}
-
-static bool isBitCastOfMallocCall(const BitCastInst* BCI) {
-  if (!BCI)
-    return false;
-    
-  return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
-}
-
-/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
-/// instruction is a bitcast of the result of a malloc call.
-CallInst* llvm::extractMallocCallFromBitCast(Value* I) {
-  BitCastInst *BCI = dyn_cast<BitCastInst>(I);
-  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
-                                      : NULL;
-}
-
-const CallInst* llvm::extractMallocCallFromBitCast(const Value* I) {
-  const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
-  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
-                                      : NULL;
-}
-
-static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
-                                const TargetData* TD) {
-  if (!CI)
-    return false;
-
-  const Type* T = getMallocAllocatedType(CI);
-
-  // We can only indentify an array malloc if we know the type of the malloc 
-  // call.
-  if (!T) return false;
-
-  Value* MallocArg = CI->getOperand(1);
-  Constant *ElementSize = ConstantExpr::getSizeOf(T);
-  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
-                                                MallocArg->getType());
-  Constant *FoldedElementSize = ConstantFoldConstantExpression(
-                                       cast<ConstantExpr>(ElementSize), 
-                                       Context, TD);
-
-
-  if (isa<ConstantExpr>(MallocArg))
-    return (MallocArg != ElementSize);
-
-  BinaryOperator *BI = dyn_cast<BinaryOperator>(MallocArg);
-  if (!BI)
-    return false;
-
-  if (BI->getOpcode() == Instruction::Mul)
-    // ArraySize * ElementSize
-    if (BI->getOperand(1) == ElementSize ||
-        (FoldedElementSize && BI->getOperand(1) == FoldedElementSize))
-      return true;
-
-  // TODO: Detect case where MallocArg mul has been transformed to shl.
-
-  return false;
-}
-
-/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
-/// matches the malloc call IR generated by CallInst::CreateMalloc().  This 
-/// means that it is a malloc call with one bitcast use AND the malloc call's 
-/// size argument is:
-///  1. a constant not equal to the size of the malloced type
-/// or
-///  2. the result of a multiplication by the size of the malloced type
-/// Otherwise it returns NULL.
-/// The unique bitcast is needed to determine the type/size of the array
-/// allocation.
-CallInst* llvm::isArrayMalloc(Value* I, LLVMContext &Context,
-                              const TargetData* TD) {
-  CallInst *CI = extractMallocCall(I);
-  return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL;
-}
-
-const CallInst* llvm::isArrayMalloc(const Value* I, LLVMContext &Context,
-                                    const TargetData* TD) {
-  const CallInst *CI = extractMallocCall(I);
-  return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL;
-}
-
-/// getMallocType - Returns the PointerType resulting from the malloc call.
-/// This PointerType is the result type of the call's only bitcast use.
-/// If there is no unique bitcast use, then return NULL.
-const PointerType* llvm::getMallocType(const CallInst* CI) {
-  assert(isMalloc(CI) && "GetMallocType and not malloc call");
-  
-  const BitCastInst* BCI = NULL;
-  
-  // Determine if CallInst has a bitcast use.
-  for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
-       UI != E; )
-    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
-      break;
-
-  // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
-  // destination type.
-  if (BCI && CI->hasOneUse())
-    return cast<PointerType>(BCI->getDestTy());
-
-  // Malloc call was not bitcast, so type is the malloc function's return type.
-  if (!BCI)
-    return cast<PointerType>(CI->getType());
-
-  // Type could not be determined.
-  return NULL;
-}
-
-/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
-/// Type is the result type of the call's only bitcast use. If there is no
-/// unique bitcast use, then return NULL.
-const Type* llvm::getMallocAllocatedType(const CallInst* CI) {
-  const PointerType* PT = getMallocType(CI);
-  return PT ? PT->getElementType() : NULL;
-}
-
-/// isSafeToGetMallocArraySize - Returns true if the array size of a malloc can
-/// be determined.  It can be determined in these 3 cases of malloc codegen:
-/// 1. non-array malloc: The malloc's size argument is a constant and equals the ///    size of the type being malloced.
-/// 2. array malloc: This is a malloc call with one bitcast use AND the malloc
-///    call's size argument is a constant multiple of the size of the malloced
-///    type.
-/// 3. array malloc: This is a malloc call with one bitcast use AND the malloc
-///    call's size argument is the result of a multiplication by the size of the
-///    malloced type.
-/// Otherwise returns false.
-static bool isSafeToGetMallocArraySize(const CallInst *CI,
-                                       LLVMContext &Context,
-                                       const TargetData* TD) {
-  if (!CI)
-    return false;
-
-  // Type must be known to determine array size.
-  const Type* T = getMallocAllocatedType(CI);
-  if (!T) return false;
-
-  Value* MallocArg = CI->getOperand(1);
-  Constant *ElementSize = ConstantExpr::getSizeOf(T);
-  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
-                                                MallocArg->getType());
-
-  // First, check if it is a non-array malloc.
-  if (isa<ConstantExpr>(MallocArg) && (MallocArg == ElementSize))
-    return true;
-
-  // Second, check if it can be determined that this is an array malloc.
-  return isArrayMallocHelper(CI, Context, TD);
-}
-
-/// isConstantOne - Return true only if val is constant int 1.
-static bool isConstantOne(Value *val) {
-  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
-}
-
-/// getMallocArraySize - Returns the array size of a malloc call.  For array
-/// mallocs, the size is computated in 1 of 3 ways:
-///  1. If the element type is of size 1, then array size is the argument to 
-///     malloc.
-///  2. Else if the malloc's argument is a constant, the array size is that
-///     argument divided by the element type's size.
-///  3. Else the malloc argument must be a multiplication and the array size is
-///     the first operand of the multiplication.
-/// For non-array mallocs, the computed size is constant 1. 
-/// This function returns NULL for all mallocs whose array size cannot be
-/// determined.
-Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context,
-                                const TargetData* TD) {
-  if (!isSafeToGetMallocArraySize(CI, Context, TD))
-    return NULL;
-
-  // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
-  if (!isArrayMalloc(CI, Context, TD))
-    return ConstantInt::get(CI->getOperand(1)->getType(), 1);
-
-  Value* MallocArg = CI->getOperand(1);
-  assert(getMallocAllocatedType(CI) && "getMallocArraySize and no type");
-  Constant *ElementSize = ConstantExpr::getSizeOf(getMallocAllocatedType(CI));
-  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
-                                                MallocArg->getType());
-
-  Constant* CO = dyn_cast<Constant>(MallocArg);
-  BinaryOperator* BO = dyn_cast<BinaryOperator>(MallocArg);
-  assert((isConstantOne(ElementSize) || CO || BO) &&
-         "getMallocArraySize and malformed malloc IR");
-      
-  if (isConstantOne(ElementSize))
-    return MallocArg;
-    
-  if (CO)
-    return CO->getOperand(0);
-    
-  // TODO: Detect case where MallocArg mul has been transformed to shl.
-
-  assert(BO && "getMallocArraySize not constant but not multiplication either");
-  return BO->getOperand(0);
-}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 0000000..e710350
--- /dev/null
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,277 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value *I) {
+  return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+  if (!CI)
+    return false;
+
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc")
+    return false;
+
+  // Check malloc prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (FTy->getNumParams() != 1)
+    return false;
+  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
+    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst *llvm::extractMallocCall(Value *I) {
+  CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst *BCI) {
+  if (!BCI)
+    return false;
+    
+  return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst *llvm::extractMallocCallFromBitCast(Value *I) {
+  BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
+  const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+/// isConstantOne - Return true only if val is constant int 1.
+static bool isConstantOne(Value *val) {
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
+                                  const TargetData *TD) {
+  if (!CI)
+    return NULL;
+
+  // Type must be known to determine array size.
+  const Type *T = getMallocAllocatedType(CI);
+  if (!T)
+    return NULL;
+
+  Value *MallocArg = CI->getOperand(1);
+  ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg);
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg);
+
+  Constant *ElementSize = ConstantExpr::getSizeOf(T);
+  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
+                                                MallocArg->getType());
+  Constant *FoldedElementSize =
+   ConstantFoldConstantExpression(cast<ConstantExpr>(ElementSize), Context, TD);
+
+  // First, check if CI is a non-array malloc.
+  if (CO && ((CO == ElementSize) ||
+             (FoldedElementSize && (CO == FoldedElementSize))))
+    // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
+    return ConstantInt::get(MallocArg->getType(), 1);
+
+  // Second, check if CI is an array malloc whose array size can be determined.
+  if (isConstantOne(ElementSize) || 
+      (FoldedElementSize && isConstantOne(FoldedElementSize)))
+    return MallocArg;
+
+  if (!CO && !BO)
+    return NULL;
+
+  Value *Op0 = NULL;
+  Value *Op1 = NULL;
+  unsigned Opcode = 0;
+  if (CO && ((CO->getOpcode() == Instruction::Mul) || 
+             (CO->getOpcode() == Instruction::Shl))) {
+    Op0 = CO->getOperand(0);
+    Op1 = CO->getOperand(1);
+    Opcode = CO->getOpcode();
+  }
+  if (BO && ((BO->getOpcode() == Instruction::Mul) || 
+             (BO->getOpcode() == Instruction::Shl))) {
+    Op0 = BO->getOperand(0);
+    Op1 = BO->getOperand(1);
+    Opcode = BO->getOpcode();
+  }
+
+  // Determine array size if malloc's argument is the product of a mul or shl.
+  if (Op0) {
+    if (Opcode == Instruction::Mul) {
+      if ((Op1 == ElementSize) ||
+          (FoldedElementSize && (Op1 == FoldedElementSize)))
+        // ArraySize * ElementSize
+        return Op0;
+      if ((Op0 == ElementSize) ||
+          (FoldedElementSize && (Op0 == FoldedElementSize)))
+        // ElementSize * ArraySize
+        return Op1;
+    }
+    if (Opcode == Instruction::Shl) {
+      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+      if (!Op1CI) return NULL;
+      
+      APInt Op1Int = Op1CI->getValue();
+      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+      Value *Op1Pow = ConstantInt::get(Context, 
+                                  APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
+      if (Op0 == ElementSize || (FoldedElementSize && Op0 == FoldedElementSize))
+        // ArraySize << log2(ElementSize)
+        return Op1Pow;
+      if (Op1Pow == ElementSize ||
+          (FoldedElementSize && Op1Pow == FoldedElementSize))
+        // ElementSize << log2(ArraySize)
+        return Op0;
+    }
+  }
+
+  // We could not determine the malloc array size from MallocArg.
+  return NULL;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1.  Otherwise, return NULL.
+CallInst *llvm::isArrayMalloc(Value *I, LLVMContext &Context,
+                              const TargetData *TD) {
+  CallInst *CI = extractMallocCall(I);
+  Value *ArraySize = isArrayMallocHelper(CI, Context, TD);
+
+  if (ArraySize &&
+      ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
+    return CI;
+
+  // CI is a non-array malloc or we can't figure out that it is an array malloc.
+  return NULL;
+}
+
+const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context,
+                                    const TargetData *TD) {
+  const CallInst *CI = extractMallocCall(I);
+  Value *ArraySize = isArrayMallocHelper(CI, Context, TD);
+
+  if (ArraySize &&
+      ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
+    return CI;
+
+  // CI is a non-array malloc or we can't figure out that it is an array malloc.
+  return NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// This PointerType is the result type of the call's only bitcast use.
+/// If there is no unique bitcast use, then return NULL.
+const PointerType *llvm::getMallocType(const CallInst *CI) {
+  assert(isMalloc(CI) && "GetMallocType and not malloc call");
+  
+  const BitCastInst *BCI = NULL;
+  
+  // Determine if CallInst has a bitcast use.
+  for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; )
+    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+      break;
+
+  // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
+  // destination type.
+  if (BCI && CI->hasOneUse())
+    return cast<PointerType>(BCI->getDestTy());
+
+  // Malloc call was not bitcast, so type is the malloc function's return type.
+  if (!BCI)
+    return cast<PointerType>(CI->getType());
+
+  // Type could not be determined.
+  return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
+/// Type is the result type of the call's only bitcast use. If there is no
+/// unique bitcast use, then return NULL.
+const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
+  const PointerType *PT = getMallocType(CI);
+  return PT ? PT->getElementType() : NULL;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call.  If the 
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple.  For non-array mallocs, the multiple is
+/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, LLVMContext &Context,
+                                const TargetData *TD) {
+  return isArrayMallocHelper(CI, Context, TD);
+}
+
+//===----------------------------------------------------------------------===//
+//  free Call Utility Functions.
+//
+
+/// isFreeCall - Returns true if the the value is a call to the builtin free()
+bool llvm::isFreeCall(const Value *I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI)
+    return false;
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free")
+    return false;
+
+  // Check free prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (!FTy->getReturnType()->isVoidTy())
+    return false;
+  if (FTy->getNumParams() != 1)
+    return false;
+  if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
+    return false;
+
+  return true;
+}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index d640075..0ec0e74 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -20,7 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
@@ -113,10 +113,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
       Pointer = V->getOperand(0);
       PointerSize = AA->getTypeStoreSize(V->getType());
-    } else if (FreeInst *F = dyn_cast<FreeInst>(Inst)) {
-      Pointer = F->getPointerOperand();
-      
-      // FreeInsts erase the entire structure
+    } else if (isFreeCall(Inst)) {
+      Pointer = Inst->getOperand(1);
+      // calls to free() erase the entire structure
+      PointerSize = ~0ULL;
+    } else if (isFreeCall(Inst)) {
+      Pointer = Inst->getOperand(0);
+      // calls to free() erase the entire structure
       PointerSize = ~0ULL;
     } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
       // Debug intrinsics don't cause dependences.
@@ -168,13 +171,54 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 /// location depends.  If isLoad is true, this routine ignore may-aliases with
 /// read-only operations.
 MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
+getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
+  Value* invariantTag = 0;
+
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
     Instruction *Inst = --ScanIt;
 
+    // If we're in an invariant region, no dependencies can be found before
+    // we pass an invariant-begin marker.
+    if (invariantTag == Inst) {
+      invariantTag = 0;
+      continue;
+    } else if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) {
+      // If we pass an invariant-end marker, then we've just entered an
+      // invariant region and can start ignoring dependencies.
+      if (II->getIntrinsicID() == Intrinsic::invariant_end) {
+        uint64_t invariantSize = ~0ULL;
+        if (ConstantInt* CI = dyn_cast<ConstantInt>(II->getOperand(2)))
+          invariantSize = CI->getZExtValue();
+        
+        AliasAnalysis::AliasResult R =
+          AA->alias(II->getOperand(3), invariantSize, MemPtr, MemSize);
+        if (R == AliasAnalysis::MustAlias) {
+          invariantTag = II->getOperand(1);
+          continue;
+        }
+      
+      // If we reach a lifetime begin or end marker, then the query ends here
+      // because the value is undefined.
+      } else if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+                   II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        uint64_t invariantSize = ~0ULL;
+        if (ConstantInt* CI = dyn_cast<ConstantInt>(II->getOperand(1)))
+          invariantSize = CI->getZExtValue();
+
+        AliasAnalysis::AliasResult R =
+          AA->alias(II->getOperand(2), invariantSize, MemPtr, MemSize);
+        if (R == AliasAnalysis::MustAlias)
+          return MemDepResult::getDef(II);
+      }
+    }
+
+    // If we're querying on a load and we're in an invariant region, we're done
+    // at this point. Nothing a load depends on can live in an invariant region.
+    if (isLoad && invariantTag) continue;
+
     // Debug intrinsics don't cause dependences.
     if (isa<DbgInfoIntrinsic>(Inst)) continue;
 
@@ -199,6 +243,10 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     }
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // There can't be stores to the value we care about inside an 
+      // invariant region.
+      if (invariantTag) continue;
+      
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
@@ -229,7 +277,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // a subsequent bitcast of the malloc call result.  There can be stores to
     // the malloced memory between the malloc call and its bitcast uses, and we
     // need to continue scanning until the malloc call.
-    if (isa<AllocationInst>(Inst) || extractMallocCall(Inst)) {
+    if (isa<AllocaInst>(Inst) || extractMallocCall(Inst)) {
       Value *AccessPtr = MemPtr->getUnderlyingObject();
       
       if (AccessPtr == Inst ||
@@ -243,12 +291,16 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     case AliasAnalysis::NoModRef:
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
+    case AliasAnalysis::Mod:
+      // If we're in an invariant region, we can ignore calls that ONLY
+      // modify the pointer.
+      if (invariantTag) continue;
+      return MemDepResult::getClobber(Inst);
     case AliasAnalysis::Ref:
       // If the call is known to never store to the pointer, and if this is a
       // load query, we can safely ignore it (scan past it).
       if (isLoad)
         continue;
-      // FALL THROUGH.
     default:
       // Otherwise, there is a potential dependence.  Return a clobber.
       return MemDepResult::getClobber(Inst);
@@ -314,15 +366,15 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       MemPtr = LI->getPointerOperand();
       MemSize = AA->getTypeStoreSize(LI->getType());
     }
+  } else if (isFreeCall(QueryInst)) {
+    MemPtr = QueryInst->getOperand(1);
+    // calls to free() erase the entire structure, not just a field.
+    MemSize = ~0UL;
   } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
     CallSite QueryCS = CallSite::get(QueryInst);
     bool isReadOnly = AA->onlyReadsMemory(QueryCS);
     LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
                                            QueryParent);
-  } else if (FreeInst *FI = dyn_cast<FreeInst>(QueryInst)) {
-    MemPtr = FI->getPointerOperand();
-    // FreeInsts erase the entire structure, not just a field.
-    MemSize = ~0UL;
   } else {
     // Non-memory instruction.
     LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 2309fbc..2251b62 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -13,7 +13,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PointerTracking.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -93,7 +93,7 @@ bool PointerTracking::doInitialization(Module &M) {
 const SCEV *PointerTracking::computeAllocationCount(Value *P,
                                                     const Type *&Ty) const {
   Value *V = P->stripPointerCasts();
-  if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     Value *arraySize = AI->getArraySize();
     Ty = AI->getAllocatedType();
     // arraySize elements of type Ty.
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index c585c1d..e767891 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -30,8 +30,7 @@ LoopWeight(
 );
 
 namespace {
-  class VISIBILITY_HIDDEN ProfileEstimatorPass :
-      public FunctionPass, public ProfileInfo {
+  class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
     double ExecCount;
     LoopInfo *LI;
     std::set<BasicBlock*>  BBToVisit;
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 9efdd23..7f24f5a 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
@@ -178,8 +177,7 @@ raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) {
 //
 
 namespace {
-  struct VISIBILITY_HIDDEN NoProfileInfo 
-    : public ImmutablePass, public ProfileInfo {
+  struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
     static char ID; // Class identification, replacement for typeinfo
     NoProfileInfo() : ImmutablePass(&ID) {}
   };
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 89d90bc..9e1dfb6 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -38,7 +37,7 @@ ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
                     cl::desc("Profile file loaded by -profile-loader"));
 
 namespace {
-  class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo {
+  class LoaderPass : public ModulePass, public ProfileInfo {
     std::string Filename;
     std::set<Edge> SpanningTree;
     std::set<const BasicBlock*> BBisUnvisited;
@@ -61,7 +60,7 @@ namespace {
     // recurseBasicBlock() - Calculates the edge weights for as much basic
     // blocks as possbile.
     virtual void recurseBasicBlock(const BasicBlock *BB);
-    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, unsigned &);
+    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
     virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
 
     /// run - Load the profile information from the specified file.
@@ -85,7 +84,7 @@ Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
 }
 
 void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, 
-                                    unsigned &uncalc, unsigned &count) {
+                                    unsigned &uncalc, double &count) {
   double w;
   if ((w = getEdgeWeight(edge)) == MissingValue) {
     tocalc = edge;
@@ -118,7 +117,7 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
 
   // collect weights of all incoming and outgoing edges, rememer edges that
   // have no value
-  unsigned incount = 0;
+  double incount = 0;
   SmallSet<const BasicBlock*,8> pred_visited;
   pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
   if (bbi==bbe) {
@@ -130,7 +129,7 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
     }
   }
 
-  unsigned outcount = 0;
+  double outcount = 0;
   SmallSet<const BasicBlock*,8> succ_visited;
   succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
   if (sbbi==sbbe) {
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 9766da5..5f36294 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -30,7 +30,7 @@ ProfileVerifierDisableAssertions("profile-verifier-noassert",
      cl::desc("Disable assertions"));
 
 namespace {
-  class VISIBILITY_HIDDEN ProfileVerifierPass : public FunctionPass {
+  class ProfileVerifierPass : public FunctionPass {
 
     struct DetailedBlockInfo {
       const BasicBlock *BB;
@@ -229,7 +229,8 @@ void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) {
   // to debug printers.
   DetailedBlockInfo DI;
   DI.BB = BB;
-  DI.outCount = DI.inCount = DI.inWeight = DI.outWeight = 0;
+  DI.outCount = DI.inCount = 0;
+  DI.inWeight = DI.outWeight = 0.0;
 
   // Read predecessors.
   std::set<const BasicBlock*> ProcessedPreds;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 62f3aa1..3e87ca2 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -74,7 +74,6 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -401,7 +400,7 @@ namespace {
   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
   /// than the complexity of the RHS.  This comparator is used to canonicalize
   /// expressions.
-  class VISIBILITY_HIDDEN SCEVComplexityCompare {
+  class SCEVComplexityCompare {
     LoopInfo *LI;
   public:
     explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {}
@@ -3266,9 +3265,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
     // Now that we know more about the trip count for this loop, forget any
     // existing SCEV values for PHI nodes in this loop since they are only
     // conservative estimates made without the benefit of trip count
-    // information. This is similar to the code in
-    // forgetLoopBackedgeTakenCount, except that it handles SCEVUnknown PHI
-    // nodes specially.
+    // information. This is similar to the code in forgetLoop, except that
+    // it handles SCEVUnknown PHI nodes specially.
     if (ItCount.hasAnyInfo()) {
       SmallVector<Instruction *, 16> Worklist;
       PushLoopPHIs(L, Worklist);
@@ -3302,13 +3300,14 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   return Pair.first->second;
 }
 
-/// forgetLoopBackedgeTakenCount - This method should be called by the
-/// client when it has changed a loop in a way that may effect
-/// ScalarEvolution's ability to compute a trip count, or if the loop
-/// is deleted.
-void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) {
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+  // Drop any stored trip count value.
   BackedgeTakenCounts.erase(L);
 
+  // Drop information about expressions based on loop-header PHIs.
   SmallVector<Instruction *, 16> Worklist;
   PushLoopPHIs(L, Worklist);
 
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index cc79e6c..ef0e97b 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,14 +19,13 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 namespace {
   /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
   /// implementation that uses ScalarEvolution to answer queries.
-  class VISIBILITY_HIDDEN ScalarEvolutionAliasAnalysis : public FunctionPass,
-                                                         public AliasAnalysis {
+  class ScalarEvolutionAliasAnalysis : public FunctionPass,
+                                       public AliasAnalysis {
     ScalarEvolution *SE;
 
   public:
@@ -39,7 +38,7 @@ namespace {
     virtual AliasResult alias(const Value *V1, unsigned V1Size,
                               const Value *V2, unsigned V2Size);
 
-    Value *GetUnderlyingIdentifiedObject(const SCEV *S);
+    Value *GetBaseValue(const SCEV *S);
   };
 }  // End of anonymous namespace
 
@@ -69,25 +68,22 @@ ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
   return false;
 }
 
-/// GetUnderlyingIdentifiedObject - Given an expression, try to find an
-/// "identified object" (see AliasAnalysis::isIdentifiedObject) base
-/// value. Return null is none was found.
+/// GetBaseValue - Given an expression, try to find a
+/// base value. Return null is none was found.
 Value *
-ScalarEvolutionAliasAnalysis::GetUnderlyingIdentifiedObject(const SCEV *S) {
+ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // In an addrec, assume that the base will be in the start, rather
     // than the step.
-    return GetUnderlyingIdentifiedObject(AR->getStart());
+    return GetBaseValue(AR->getStart());
   } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
     // If there's a pointer operand, it'll be sorted at the end of the list.
     const SCEV *Last = A->getOperand(A->getNumOperands()-1);
     if (isa<PointerType>(Last->getType()))
-      return GetUnderlyingIdentifiedObject(Last);
+      return GetBaseValue(Last);
   } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
-    // Determine if we've found an Identified object.
-    Value *V = U->getValue();
-    if (isIdentifiedObject(V))
-      return V;
+    // This is a leaf node.
+    return U->getValue();
   }
   // No Identified object found.
   return 0;
@@ -121,8 +117,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
   // If ScalarEvolution can find an underlying object, form a new query.
   // The correctness of this depends on ScalarEvolution not recognizing
   // inttoptr and ptrtoint operators.
-  Value *AO = GetUnderlyingIdentifiedObject(AS);
-  Value *BO = GetUnderlyingIdentifiedObject(BS);
+  Value *AO = GetBaseValue(AS);
+  Value *BO = GetBaseValue(BS);
   if ((AO && AO != A) || (BO && BO != B))
     if (alias(AO ? AO : A, AO ? ~0u : ASize,
               BO ? BO : B, BO ? ~0u : BSize) == NoAlias)
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index b7844f0..d7bcac2 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -166,6 +166,11 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
     return;
   }
   
+  if (isa<IndirectBrInst>(TI)) {
+    Succs.assign(Succs.size(), true);
+    return;
+  }
+  
   SwitchInst &SI = cast<SwitchInst>(TI);
   LatticeVal SCValue;
   if (AggressiveUndef)
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index dc0d489..5672510 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -470,7 +470,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
 
   case Instruction::Alloca: {
-    AllocationInst *AI = cast<AllocationInst>(V);
+    AllocaInst *AI = cast<AllocaInst>(V);
     unsigned Align = AI->getAlignment();
     if (Align == 0 && TD)
       Align = TD->getABITypeAlignment(AI->getType()->getElementType());
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f6cea88..1b7c9c6 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -576,6 +576,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
 
   KEYWORD(x);
+  KEYWORD(blockaddress);
 #undef KEYWORD
 
   // Keywords for types.
@@ -606,6 +607,10 @@ lltok::Kind LLLexer::LexIdentifier() {
     // FIXME: Remove in LLVM 3.0.
     // Autoupgrade malloc instruction.
     return lltok::kw_malloc;
+  } else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
+    // FIXME: Remove in LLVM 3.0.
+    // Autoupgrade malloc instruction.
+    return lltok::kw_free;
   }
 
   // Keywords for instructions.
@@ -641,12 +646,12 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(ret,         Ret);
   INSTKEYWORD(br,          Br);
   INSTKEYWORD(switch,      Switch);
+  INSTKEYWORD(indirectbr,  IndirectBr);
   INSTKEYWORD(invoke,      Invoke);
   INSTKEYWORD(unwind,      Unwind);
   INSTKEYWORD(unreachable, Unreachable);
 
   INSTKEYWORD(alloca,      Alloca);
-  INSTKEYWORD(free,        Free);
   INSTKEYWORD(load,        Load);
   INSTKEYWORD(store,       Store);
   INSTKEYWORD(getelementptr, GetElementPtr);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 271567b..0da0f4a 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -29,34 +29,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-namespace llvm {
-  /// ValID - Represents a reference of a definition of some sort with no type.
-  /// There are several cases where we have to parse the value but where the
-  /// type can depend on later context.  This may either be a numeric reference
-  /// or a symbolic (%var) reference.  This is just a discriminated union.
-  struct ValID {
-    enum {
-      t_LocalID, t_GlobalID,      // ID in UIntVal.
-      t_LocalName, t_GlobalName,  // Name in StrVal.
-      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
-      t_Null, t_Undef, t_Zero,    // No value.
-      t_EmptyArray,               // No value:  []
-      t_Constant,                 // Value in ConstantVal.
-      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
-      t_Metadata                  // Value in MetadataVal.
-    } Kind;
-
-    LLParser::LocTy Loc;
-    unsigned UIntVal;
-    std::string StrVal, StrVal2;
-    APSInt APSIntVal;
-    APFloat APFloatVal;
-    Constant *ConstantVal;
-    MetadataBase *MetadataVal;
-    ValID() : APFloatVal(0.0) {}
-  };
-}
-
 /// Run: module ::= toplevelentity*
 bool LLParser::Run() {
   // Prime the lexer.
@@ -77,7 +49,7 @@ bool LLParser::ValidateEndOfModule() {
     // declaration of "malloc".  In that case, iterate over all calls to MallocF
     // and get them to call the declared "malloc" instead.
     if (MallocF->getName() != "malloc") {
-      Constant* RealMallocF = M->getFunction("malloc");
+      Constant *RealMallocF = M->getFunction("malloc");
       if (RealMallocF->getType() != MallocF->getType())
         RealMallocF = ConstantExpr::getBitCast(RealMallocF, MallocF->getType());
       MallocF->replaceAllUsesWith(RealMallocF);
@@ -85,7 +57,32 @@ bool LLParser::ValidateEndOfModule() {
       MallocF = NULL;
     }
   }
-
+  
+  
+  // If there are entries in ForwardRefBlockAddresses at this point, they are
+  // references after the function was defined.  Resolve those now.
+  while (!ForwardRefBlockAddresses.empty()) {
+    // Okay, we are referencing an already-parsed function, resolve them now.
+    Function *TheFn = 0;
+    const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
+    if (Fn.Kind == ValID::t_GlobalName)
+      TheFn = M->getFunction(Fn.StrVal);
+    else if (Fn.UIntVal < NumberedVals.size())
+      TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
+    
+    if (TheFn == 0)
+      return Error(Fn.Loc, "unknown function referenced by blockaddress");
+    
+    // Resolve all these references.
+    if (ResolveForwardRefBlockAddresses(TheFn, 
+                                      ForwardRefBlockAddresses.begin()->second,
+                                        0))
+      return true;
+    
+    ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
+  }
+  
+  
   if (!ForwardRefTypes.empty())
     return Error(ForwardRefTypes.begin()->second.second,
                  "use of undefined type named '" +
@@ -120,6 +117,38 @@ bool LLParser::ValidateEndOfModule() {
   return false;
 }
 
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, 
+                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+                                               PerFunctionState *PFS) {
+  // Loop over all the references, resolving them.
+  for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+    BasicBlock *Res;
+    if (PFS) {
+      if (Refs[i].first.Kind == ValID::t_LocalName)
+        Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc);
+      else
+        Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc);
+    } else if (Refs[i].first.Kind == ValID::t_LocalID) {
+      return Error(Refs[i].first.Loc,
+       "cannot take address of numeric label after the function is defined");
+    } else {
+      Res = dyn_cast_or_null<BasicBlock>(
+                     TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
+    }
+    
+    if (Res == 0)
+      return Error(Refs[i].first.Loc,
+                   "referenced value is not a basic block");
+    
+    // Get the BlockAddress for this and update references to use it.
+    BlockAddress *BA = BlockAddress::get(TheFn, Res);
+    Refs[i].second->replaceAllUsesWith(BA);
+    Refs[i].second->eraseFromParent();
+  }
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 // Top-Level Entities
 //===----------------------------------------------------------------------===//
@@ -603,8 +632,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
 
   // See if this value already exists in the symbol table.  If so, it is either
   // a redefinition or a definition of a forward reference.
-  if (GlobalValue *Val =
-        cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name))) {
+  if (GlobalValue *Val = M->getNamedValue(Name)) {
     // See if this was a redefinition.  If so, there is no entry in
     // ForwardRefVals.
     std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
@@ -671,9 +699,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
 
   // See if the global was forward referenced, if so, use the global.
   if (!Name.empty()) {
-    if ((GV = M->getGlobalVariable(Name, true)) &&
-        !ForwardRefVals.erase(Name))
-      return Error(NameLoc, "redefinition of global '@" + Name + "'");
+    if (GlobalValue *GVal = M->getNamedValue(Name)) {
+      if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+        return Error(NameLoc, "redefinition of global '@" + Name + "'");
+      GV = cast<GlobalVariable>(GVal);
+    }
   } else {
     std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
       I = ForwardRefValIDs.find(NumberedVals.size());
@@ -1107,6 +1137,8 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
     return TokError("expected ',' as start of index list");
 
   while (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::NamedOrCustomMD)
+      break;
     unsigned Idx;
     if (ParseUInt32(Idx)) return true;
     Indices.push_back(Idx);
@@ -1574,8 +1606,9 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
 // Function Semantic Analysis.
 //===----------------------------------------------------------------------===//
 
-LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f)
-  : P(p), F(f) {
+LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
+                                             int functionNumber)
+  : P(p), F(f), FunctionNumber(functionNumber) {
 
   // Insert unnamed arguments into the NumberedVals list.
   for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
@@ -1605,7 +1638,29 @@ LLParser::PerFunctionState::~PerFunctionState() {
     }
 }
 
-bool LLParser::PerFunctionState::VerifyFunctionComplete() {
+bool LLParser::PerFunctionState::FinishFunction() {
+  // Check to see if someone took the address of labels in this block.
+  if (!P.ForwardRefBlockAddresses.empty()) {
+    ValID FunctionID;
+    if (!F.getName().empty()) {
+      FunctionID.Kind = ValID::t_GlobalName;
+      FunctionID.StrVal = F.getName();
+    } else {
+      FunctionID.Kind = ValID::t_GlobalID;
+      FunctionID.UIntVal = FunctionNumber;
+    }
+  
+    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
+      FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
+    if (FRBAI != P.ForwardRefBlockAddresses.end()) {
+      // Resolve all these references.
+      if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
+        return true;
+      
+      P.ForwardRefBlockAddresses.erase(FRBAI);
+    }
+  }
+  
   if (!ForwardRefVals.empty())
     return P.Error(ForwardRefVals.begin()->second.second,
                    "use of undefined value '%" + ForwardRefVals.begin()->first +
@@ -1989,6 +2044,35 @@ bool LLParser::ParseValID(ValID &ID) {
     return false;
   }
 
+  case lltok::kw_blockaddress: {
+    // ValID ::= 'blockaddress' '(' @foo ',' %bar ')'
+    Lex.Lex();
+
+    ValID Fn, Label;
+    LocTy FnLoc, LabelLoc;
+    
+    if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
+        ParseValID(Fn) ||
+        ParseToken(lltok::comma, "expected comma in block address expression")||
+        ParseValID(Label) ||
+        ParseToken(lltok::rparen, "expected ')' in block address expression"))
+      return true;
+    
+    if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
+      return Error(Fn.Loc, "expected function name in blockaddress");
+    if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
+      return Error(Label.Loc, "expected basic block name in blockaddress");
+    
+    // Make a global variable as a placeholder for this reference.
+    GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
+                                           false, GlobalValue::InternalLinkage,
+                                                0, "");
+    ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
+    ID.ConstantVal = FwdRef;
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+      
   case lltok::kw_trunc:
   case lltok::kw_zext:
   case lltok::kw_sext:
@@ -2029,6 +2113,9 @@ bool LLParser::ParseValID(ValID &ID) {
         ParseIndexList(Indices) ||
         ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
       return true;
+    if (Lex.getKind() == lltok::NamedOrCustomMD)
+      if (ParseOptionalCustomMetadata()) return true;
+
     if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
       return Error(ID.Loc, "extractvalue operand must be array or struct");
     if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
@@ -2050,6 +2137,8 @@ bool LLParser::ParseValID(ValID &ID) {
         ParseIndexList(Indices) ||
         ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
       return true;
+    if (Lex.getKind() == lltok::NamedOrCustomMD)
+      if (ParseOptionalCustomMetadata()) return true;
     if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
       return Error(ID.Loc, "extractvalue operand must be array or struct");
     if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
@@ -2411,6 +2500,18 @@ bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
          ParseValue(T, V, PFS);
 }
 
+bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+                                      PerFunctionState &PFS) {
+  Value *V;
+  Loc = Lex.getLoc();
+  if (ParseTypeAndValue(V, PFS)) return true;
+  if (!isa<BasicBlock>(V))
+    return Error(Loc, "expected a basic block");
+  BB = cast<BasicBlock>(V);
+  return false;
+}
+
+
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
 ///       Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
@@ -2563,6 +2664,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
              AI != AE; ++AI)
           AI->setName("");
       }
+    } else if (M->getNamedValue(FunctionName)) {
+      return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
     }
 
   } else {
@@ -2622,7 +2725,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
     return TokError("expected '{' in function body");
   Lex.Lex();  // eat the {.
 
-  PerFunctionState PFS(*this, Fn);
+  int FunctionNumber = -1;
+  if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
+  
+  PerFunctionState PFS(*this, Fn, FunctionNumber);
 
   while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
     if (ParseBasicBlock(PFS)) return true;
@@ -2631,7 +2737,7 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
   Lex.Lex();
 
   // Verify function is ok.
-  return PFS.VerifyFunctionComplete();
+  return PFS.FinishFunction();
 }
 
 /// ParseBasicBlock
@@ -2716,6 +2822,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_ret:         return ParseRet(Inst, BB, PFS);
   case lltok::kw_br:          return ParseBr(Inst, PFS);
   case lltok::kw_switch:      return ParseSwitch(Inst, PFS);
+  case lltok::kw_indirectbr:  return ParseIndirectBr(Inst, PFS);
   case lltok::kw_invoke:      return ParseInvoke(Inst, PFS);
   // Binary Operators.
   case lltok::kw_add:
@@ -2800,7 +2907,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   // Memory.
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
   case lltok::kw_malloc:         return ParseAlloc(Inst, PFS, BB, false);
-  case lltok::kw_free:           return ParseFree(Inst, PFS);
+  case lltok::kw_free:           return ParseFree(Inst, PFS, BB);
   case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
   case lltok::kw_store:          return ParseStore(Inst, PFS, false);
   case lltok::kw_volatile:
@@ -2919,7 +3026,8 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
 ///   ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
 bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy Loc, Loc2;
-  Value *Op0, *Op1, *Op2;
+  Value *Op0;
+  BasicBlock *Op1, *Op2;
   if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
 
   if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
@@ -2931,17 +3039,12 @@ bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(Loc, "branch condition must have 'i1' type");
 
   if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
-      ParseTypeAndValue(Op1, Loc, PFS) ||
+      ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
       ParseToken(lltok::comma, "expected ',' after true destination") ||
-      ParseTypeAndValue(Op2, Loc2, PFS))
+      ParseTypeAndBasicBlock(Op2, Loc2, PFS))
     return true;
 
-  if (!isa<BasicBlock>(Op1))
-    return Error(Loc, "true destination of branch must be a basic block");
-  if (!isa<BasicBlock>(Op2))
-    return Error(Loc2, "true destination of branch must be a basic block");
-
-  Inst = BranchInst::Create(cast<BasicBlock>(Op1), cast<BasicBlock>(Op2), Op0);
+  Inst = BranchInst::Create(Op1, Op2, Op0);
   return false;
 }
 
@@ -2952,50 +3055,87 @@ bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
 ///    ::= (TypeAndValue ',' TypeAndValue)*
 bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CondLoc, BBLoc;
-  Value *Cond, *DefaultBB;
+  Value *Cond;
+  BasicBlock *DefaultBB;
   if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
       ParseToken(lltok::comma, "expected ',' after switch condition") ||
-      ParseTypeAndValue(DefaultBB, BBLoc, PFS) ||
+      ParseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
       ParseToken(lltok::lsquare, "expected '[' with switch table"))
     return true;
 
   if (!isa<IntegerType>(Cond->getType()))
     return Error(CondLoc, "switch condition must have integer type");
-  if (!isa<BasicBlock>(DefaultBB))
-    return Error(BBLoc, "default destination must be a basic block");
 
   // Parse the jump table pairs.
   SmallPtrSet<Value*, 32> SeenCases;
   SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
   while (Lex.getKind() != lltok::rsquare) {
-    Value *Constant, *DestBB;
+    Value *Constant;
+    BasicBlock *DestBB;
 
     if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
         ParseToken(lltok::comma, "expected ',' after case value") ||
-        ParseTypeAndValue(DestBB, BBLoc, PFS))
+        ParseTypeAndBasicBlock(DestBB, PFS))
       return true;
-
+    
     if (!SeenCases.insert(Constant))
       return Error(CondLoc, "duplicate case value in switch");
     if (!isa<ConstantInt>(Constant))
       return Error(CondLoc, "case value is not a constant integer");
-    if (!isa<BasicBlock>(DestBB))
-      return Error(BBLoc, "case destination is not a basic block");
 
-    Table.push_back(std::make_pair(cast<ConstantInt>(Constant),
-                                   cast<BasicBlock>(DestBB)));
+    Table.push_back(std::make_pair(cast<ConstantInt>(Constant), DestBB));
   }
 
   Lex.Lex();  // Eat the ']'.
 
-  SwitchInst *SI = SwitchInst::Create(Cond, cast<BasicBlock>(DefaultBB),
-                                      Table.size());
+  SwitchInst *SI = SwitchInst::Create(Cond, DefaultBB, Table.size());
   for (unsigned i = 0, e = Table.size(); i != e; ++i)
     SI->addCase(Table[i].first, Table[i].second);
   Inst = SI;
   return false;
 }
 
+/// ParseIndirectBr
+///  Instruction
+///    ::= 'indirectbr' TypeAndValue ',' '[' LabelList ']'
+bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy AddrLoc;
+  Value *Address;
+  if (ParseTypeAndValue(Address, AddrLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
+      ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
+    return true;
+  
+  if (!isa<PointerType>(Address->getType()))
+    return Error(AddrLoc, "indirectbr address must have pointer type");
+  
+  // Parse the destination list.
+  SmallVector<BasicBlock*, 16> DestList;
+  
+  if (Lex.getKind() != lltok::rsquare) {
+    BasicBlock *DestBB;
+    if (ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    DestList.push_back(DestBB);
+    
+    while (EatIfPresent(lltok::comma)) {
+      if (ParseTypeAndBasicBlock(DestBB, PFS))
+        return true;
+      DestList.push_back(DestBB);
+    }
+  }
+  
+  if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+    return true;
+
+  IndirectBrInst *IBI = IndirectBrInst::Create(Address, DestList.size());
+  for (unsigned i = 0, e = DestList.size(); i != e; ++i)
+    IBI->addDestination(DestList[i]);
+  Inst = IBI;
+  return false;
+}
+
+
 /// ParseInvoke
 ///   ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
 ///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
@@ -3008,7 +3148,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
 
-  Value *NormalBB, *UnwindBB;
+  BasicBlock *NormalBB, *UnwindBB;
   if (ParseOptionalCallingConv(CC) ||
       ParseOptionalAttrs(RetAttrs, 1) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
@@ -3016,16 +3156,11 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
       ParseParameterList(ArgList, PFS) ||
       ParseOptionalAttrs(FnAttrs, 2) ||
       ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
-      ParseTypeAndValue(NormalBB, PFS) ||
+      ParseTypeAndBasicBlock(NormalBB, PFS) ||
       ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
-      ParseTypeAndValue(UnwindBB, PFS))
+      ParseTypeAndBasicBlock(UnwindBB, PFS))
     return true;
 
-  if (!isa<BasicBlock>(NormalBB))
-    return Error(CallLoc, "normal destination is not a basic block");
-  if (!isa<BasicBlock>(UnwindBB))
-    return Error(CallLoc, "unwind destination is not a basic block");
-
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
@@ -3093,8 +3228,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
 
-  InvokeInst *II = InvokeInst::Create(Callee, cast<BasicBlock>(NormalBB),
-                                      cast<BasicBlock>(UnwindBB),
+  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB,
                                       Args.begin(), Args.end());
   II->setCallingConv(CC);
   II->setAttributes(PAL);
@@ -3496,12 +3630,13 @@ bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
 
 /// ParseFree
 ///   ::= 'free' TypeAndValue
-bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) {
+bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
+                         BasicBlock* BB) {
   Value *Val; LocTy Loc;
   if (ParseTypeAndValue(Val, Loc, PFS)) return true;
   if (!isa<PointerType>(Val->getType()))
     return Error(Loc, "operand to free must be a pointer");
-  Inst = new FreeInst(Val);
+  Inst = CallInst::CreateFree(Val, BB);
   return false;
 }
 
@@ -3609,6 +3744,8 @@ bool LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
   if (ParseTypeAndValue(Val, Loc, PFS) ||
       ParseIndexList(Indices))
     return true;
+  if (Lex.getKind() == lltok::NamedOrCustomMD)
+    if (ParseOptionalCustomMetadata()) return true;
 
   if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
     return Error(Loc, "extractvalue operand must be array or struct");
@@ -3630,6 +3767,8 @@ bool LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseTypeAndValue(Val1, Loc1, PFS) ||
       ParseIndexList(Indices))
     return true;
+  if (Lex.getKind() == lltok::NamedOrCustomMD)
+    if (ParseOptionalCustomMetadata()) return true;
 
   if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
     return Error(Loc0, "extractvalue operand must be array or struct");
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 5dd6a2e..d60bcea 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -31,8 +31,41 @@ namespace llvm {
   class MetadataBase;
   class MDString;
   class MDNode;
-  struct ValID;
 
+  /// ValID - Represents a reference of a definition of some sort with no type.
+  /// There are several cases where we have to parse the value but where the
+  /// type can depend on later context.  This may either be a numeric reference
+  /// or a symbolic (%var) reference.  This is just a discriminated union.
+  struct ValID {
+    enum {
+      t_LocalID, t_GlobalID,      // ID in UIntVal.
+      t_LocalName, t_GlobalName,  // Name in StrVal.
+      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
+      t_Null, t_Undef, t_Zero,    // No value.
+      t_EmptyArray,               // No value:  []
+      t_Constant,                 // Value in ConstantVal.
+      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
+      t_Metadata                  // Value in MetadataVal.
+    } Kind;
+    
+    LLLexer::LocTy Loc;
+    unsigned UIntVal;
+    std::string StrVal, StrVal2;
+    APSInt APSIntVal;
+    APFloat APFloatVal;
+    Constant *ConstantVal;
+    MetadataBase *MetadataVal;
+    ValID() : APFloatVal(0.0) {}
+    
+    bool operator<(const ValID &RHS) const {
+      if (Kind == t_LocalID || Kind == t_GlobalID)
+        return UIntVal < RHS.UIntVal;
+      assert((Kind == t_LocalName || Kind == t_GlobalName) && 
+             "Ordering not defined for this ValID kind yet");
+      return StrVal < RHS.StrVal;
+    }
+  };
+  
   class LLParser {
   public:
     typedef LLLexer::LocTy LocTy;
@@ -75,7 +108,13 @@ namespace llvm {
     std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
     std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
     std::vector<GlobalValue*> NumberedVals;
-    Function* MallocF;
+    
+    // References to blockaddress.  The key is the function ValID, the value is
+    // a list of references to blocks in that function.
+    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
+      ForwardRefBlockAddresses;
+    
+    Function *MallocF;
   public:
     LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
       Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
@@ -184,13 +223,17 @@ namespace llvm {
       std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
       std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
       std::vector<Value*> NumberedVals;
+      
+      /// FunctionNumber - If this is an unnamed function, this is the slot
+      /// number of it, otherwise it is -1.
+      int FunctionNumber;
     public:
-      PerFunctionState(LLParser &p, Function &f);
+      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
       ~PerFunctionState();
 
       Function &getFunction() const { return F; }
 
-      bool VerifyFunctionComplete();
+      bool FinishFunction();
 
       /// GetVal - Get a value with the specified name or ID, creating a
       /// forward reference record if needed.  This can return null if the value
@@ -230,7 +273,13 @@ namespace llvm {
       Loc = Lex.getLoc();
       return ParseTypeAndValue(V, PFS);
     }
-
+    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+                                PerFunctionState &PFS);
+    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
+      LocTy Loc;
+      return ParseTypeAndBasicBlock(BB, Loc, PFS);
+    }
+  
     struct ParamInfo {
       LocTy Loc;
       Value *V;
@@ -264,6 +313,7 @@ namespace llvm {
     bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
     bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
 
     bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
@@ -280,13 +330,17 @@ namespace llvm {
     bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
     bool ParseAlloc(Instruction *&I, PerFunctionState &PFS,
                     BasicBlock *BB = 0, bool isAlloca = true);
-    bool ParseFree(Instruction *&I, PerFunctionState &PFS);
+    bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB);
     bool ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     bool ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
     bool ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
     bool ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
     bool ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
+    
+    bool ResolveForwardRefBlockAddresses(Function *TheFn, 
+                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+                                         PerFunctionState *PFS);
   };
 } // End llvm namespace
 
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index f5072fe..797c32e 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -111,12 +111,13 @@ namespace lltok {
     kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
     kw_select, kw_va_arg,
 
-    kw_ret, kw_br, kw_switch, kw_invoke, kw_unwind, kw_unreachable,
+    kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
+    kw_unreachable,
 
     kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr,
 
     kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult,
-    kw_extractvalue, kw_insertvalue,
+    kw_extractvalue, kw_insertvalue, kw_blockaddress,
 
     // Unsigned Valued tokens (UIntVal).
     GlobalID,          // @42
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 3a385cb..68527e3 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -837,12 +837,14 @@ bool BitcodeReader::ParseMetadata() {
       SmallString<8> Name;
       Name.resize(RecordLength-1);
       unsigned Kind = Record[0];
+      (void) Kind;
       for (unsigned i = 1; i != RecordLength; ++i)
         Name[i-1] = Record[i];
       MetadataContext &TheMetadata = Context.getMetadata();
       unsigned ExistingKind = TheMetadata.getMDKind(Name.str());
       if (ExistingKind == 0) {
         unsigned NewKind = TheMetadata.registerMDKind(Name.str());
+        (void) NewKind;
         assert (Kind == NewKind 
                 && "Unable to handle custom metadata mismatch!");
       } else {
@@ -1190,6 +1192,22 @@ bool BitcodeReader::ParseConstants() {
                          AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
       break;
     }
+    case bitc::CST_CODE_BLOCKADDRESS:{
+      if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+      const Type *FnTy = getTypeByID(Record[0]);
+      if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      Function *Fn =
+        dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
+      if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      
+      GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+                                                  Type::getInt8Ty(Context),
+                                            false, GlobalValue::InternalLinkage,
+                                                  0, "");
+      BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+      V = FwdRef;
+      break;
+    }  
     }
 
     ValueList.AssignValue(V, NextCstNo);
@@ -1949,7 +1967,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       }
       break;
     }
-    case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, opval, n, n x ops]
+    case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
       if (Record.size() < 3 || (Record.size() & 1) == 0)
         return Error("Invalid SWITCH record");
       const Type *OpTy = getTypeByID(Record[0]);
@@ -1973,7 +1991,28 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = SI;
       break;
     }
-
+    case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
+      if (Record.size() < 2)
+        return Error("Invalid INDIRECTBR record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Address = getFnValueByID(Record[1], OpTy);
+      if (OpTy == 0 || Address == 0)
+        return Error("Invalid INDIRECTBR record");
+      unsigned NumDests = Record.size()-2;
+      IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
+      InstructionList.push_back(IBI);
+      for (unsigned i = 0, e = NumDests; i != e; ++i) {
+        if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
+          IBI->addDestination(DestBB);
+        } else {
+          delete IBI;
+          return Error("Invalid INDIRECTBR record!");
+        }
+      }
+      I = IBI;
+      break;
+    }
+        
     case bitc::FUNC_CODE_INST_INVOKE: {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4) return Error("Invalid INVOKE record");
@@ -2073,7 +2112,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           OpNum != Record.size())
         return Error("Invalid FREE record");
-      I = new FreeInst(Op);
+      if (!CurBB) return Error("Invalid free instruction with no BB");
+      I = CallInst::CreateFree(Op, CurBB);
       InstructionList.push_back(I);
       break;
     }
@@ -2224,6 +2264,27 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
     }
   }
 
+  // See if anything took the address of blocks in this function.  If so,
+  // resolve them now.
+  /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
+  /// are resolved lazily when functions are loaded.
+  DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
+    BlockAddrFwdRefs.find(F);
+  if (BAFRI != BlockAddrFwdRefs.end()) {
+    std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
+    for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
+      unsigned BlockIdx = RefList[i].first;
+      if (BlockIdx >= FunctionBBs.size())
+        return Error("Invalid blockaddress block #");
+    
+      GlobalVariable *FwdRef = RefList[i].second;
+      FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
+      FwdRef->eraseFromParent();
+    }
+    
+    BlockAddrFwdRefs.erase(BAFRI);
+  }
+  
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   std::vector<BasicBlock*>().swap(FunctionBBs);
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index eefc7bd..7b3a1ae 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -94,7 +94,7 @@ public:
 class BitcodeReaderMDValueList {
   std::vector<WeakVH> MDValuePtrs;
   
-  LLVMContext& Context;
+  LLVMContext &Context;
 public:
   BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
 
@@ -122,7 +122,7 @@ public:
 };
 
 class BitcodeReader : public ModuleProvider {
-  LLVMContext& Context;
+  LLVMContext &Context;
   MemoryBuffer *Buffer;
   BitstreamReader StreamFile;
   BitstreamCursor Stream;
@@ -163,6 +163,12 @@ class BitcodeReader : public ModuleProvider {
   /// map contains info about where to find deferred function body (in the
   /// stream) and what linkage the original function had.
   DenseMap<Function*, std::pair<uint64_t, unsigned> > DeferredFunctionInfo;
+  
+  /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
+  /// are resolved lazily when functions are loaded.
+  typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
+  DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+  
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext& C)
     : Context(C), Buffer(buffer), ErrorString(0), ValueList(C), MDValueList(C) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 037854e..af0b8ac 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
@@ -750,10 +751,11 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
         assert (0 && "Unknown FP type!");
       }
     } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+      const ConstantArray *CA = cast<ConstantArray>(C);
       // Emit constant strings specially.
-      unsigned NumOps = C->getNumOperands();
+      unsigned NumOps = CA->getNumOperands();
       // If this is a null-terminated string, use the denser CSTRING encoding.
-      if (C->getOperand(NumOps-1)->isNullValue()) {
+      if (CA->getOperand(NumOps-1)->isNullValue()) {
         Code = bitc::CST_CODE_CSTRING;
         --NumOps;  // Don't encode the null, which isn't allowed by char6.
       } else {
@@ -763,7 +765,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
       bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
       for (unsigned i = 0; i != NumOps; ++i) {
-        unsigned char V = cast<ConstantInt>(C->getOperand(i))->getZExtValue();
+        unsigned char V = cast<ConstantInt>(CA->getOperand(i))->getZExtValue();
         Record.push_back(V);
         isCStr7 &= (V & 128) == 0;
         if (isCStrChar6)
@@ -851,6 +853,13 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
         Record.push_back(CE->getPredicate());
         break;
       }
+    } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+      assert(BA->getFunction() == BA->getBasicBlock()->getParent() &&
+             "Malformed blockaddress");
+      Code = bitc::CST_CODE_BLOCKADDRESS;
+      Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
+      Record.push_back(VE.getValueID(BA->getFunction()));
+      Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
     } else {
       llvm_unreachable("Unknown constant!");
     }
@@ -1000,7 +1009,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::Br:
     {
       Code = bitc::FUNC_CODE_INST_BR;
-      BranchInst &II(cast<BranchInst>(I));
+      BranchInst &II = cast<BranchInst>(I);
       Vals.push_back(VE.getValueID(II.getSuccessor(0)));
       if (II.isConditional()) {
         Vals.push_back(VE.getValueID(II.getSuccessor(1)));
@@ -1014,6 +1023,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i)));
     break;
+  case Instruction::IndirectBr:
+    Code = bitc::FUNC_CODE_INST_INDIRECTBR;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+      
   case Instruction::Invoke: {
     const InvokeInst *II = cast<InvokeInst>(&I);
     const Value *Callee(II->getCalledValue());
@@ -1054,11 +1070,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       Vals.push_back(VE.getValueID(I.getOperand(i)));
     break;
 
-  case Instruction::Free:
-    Code = bitc::FUNC_CODE_INST_FREE;
-    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
-    break;
-
   case Instruction::Alloca:
     Code = bitc::FUNC_CODE_INST_ALLOCA;
     Vals.push_back(VE.getTypeID(I.getType()));
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 85aa5fa..d840d4a 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -14,6 +14,7 @@
 #include "ValueEnumerator.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/TypeSymbolTable.h"
@@ -222,7 +223,9 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) {
         EnumerateType(Type::getVoidTy(MD->getContext()));
     }
     return;
-  } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(MD)) {
+  }
+  
+  if (const NamedMDNode *N = dyn_cast<NamedMDNode>(MD)) {
     for(NamedMDNode::const_elem_iterator I = N->elem_begin(),
           E = N->elem_end(); I != E; ++I) {
       MetadataBase *M = *I;
@@ -273,7 +276,8 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
       // graph that don't go through a global variable.
       for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
            I != E; ++I)
-        EnumerateValue(*I);
+        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+          EnumerateValue(*I);
 
       // Finally, add the value.  Doing this could make the ValueID reference be
       // dangling, don't reuse it.
@@ -319,15 +323,20 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
 
     // This constant may have operands, make sure to enumerate the types in
     // them.
-    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-      EnumerateOperandType(C->getOperand(i));
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      const User *Op = C->getOperand(i);
+      
+      // Don't enumerate basic blocks here, this happens as operands to
+      // blockaddress.
+      if (isa<BasicBlock>(Op)) continue;
+      
+      EnumerateOperandType(cast<Constant>(Op));
+    }
 
     if (const MDNode *N = dyn_cast<MDNode>(V)) {
-      for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) {
-        Value *Elem = N->getElement(i);
-        if (Elem)
+      for (unsigned i = 0, e = N->getNumElements(); i != e; ++i)
+        if (Value *Elem = N->getElement(i))
           EnumerateOperandType(Elem);
-      }
     }
   } else if (isa<MDString>(V) || isa<MDNode>(V))
     EnumerateValue(V);
@@ -396,3 +405,23 @@ void ValueEnumerator::purgeFunction() {
   Values.resize(NumModuleValues);
   BasicBlocks.clear();
 }
+
+static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
+                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
+  unsigned Counter = 0;
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    IDMap[BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block.  This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+  unsigned &Idx = GlobalBasicBlockIDs[BB];
+  if (Idx != 0)
+    return Idx-1;
+
+  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+  return getGlobalBasicBlockID(BB);
+}
+
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index da63dde..3c83e35 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -53,6 +53,10 @@ private:
   AttributeMapType AttributeMap;
   std::vector<AttrListPtr> Attributes;
   
+  /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+  /// the "getGlobalBasicBlockID" method.
+  mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
+  
   typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
   InstructionMapType InstructionMap;
   unsigned InstructionCount;
@@ -106,6 +110,11 @@ public:
   const std::vector<AttrListPtr> &getAttributes() const {
     return Attributes;
   }
+  
+  /// getGlobalBasicBlockID - This returns the function-specific ID for the
+  /// specified basic block.  This is relatively expensive information, so it
+  /// should only be used by rare constructs such as address-of-label.
+  unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
 
   /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
   /// use these two methods to get its data into the ValueEnumerator!
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 0000000..ffb6315
--- /dev/null
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,794 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<int>
+AntiDepTrials("agg-antidep-trials",
+              cl::desc("Maximum number of anti-dependency breaking passes"),
+              cl::init(1), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) :
+  GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) {
+  // Initialize all registers to be in their own group. Initially we
+  // assign the register to the same-indexed GroupNode.
+  for (unsigned i = 0; i < TargetRegisterInfo::FirstVirtualRegister; ++i)
+    GroupNodeIndices[i] = i;
+
+  // Initialize the indices to indicate that no registers are live.
+  std::fill(KillIndices, array_endof(KillIndices), ~0u);
+  std::fill(DefIndices, array_endof(DefIndices), BB->size());
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
+{
+  unsigned Node = GroupNodeIndices[Reg];
+  while (GroupNodes[Node] != Node)
+    Node = GroupNodes[Node];
+
+  return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs)
+{
+  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
+    if (GetGroup(Reg) == Group)
+      Regs.push_back(Reg);
+  }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+  assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+  assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+  
+  // find group for each register
+  unsigned Group1 = GetGroup(Reg1);
+  unsigned Group2 = GetGroup(Reg2);
+  
+  // if either group is 0, then that must become the parent
+  unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+  unsigned Other = (Parent == Group1) ? Group2 : Group1;
+  GroupNodes.at(Other) = Parent;
+  return Parent;
+}
+  
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+  // Create a new GroupNode for Reg. Reg's existing GroupNode must
+  // stay as is because there could be other GroupNodes referring to
+  // it.
+  unsigned idx = GroupNodes.size();
+  GroupNodes.push_back(idx);
+  GroupNodeIndices[Reg] = idx;
+  return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+  // KillIndex must be defined and DefIndex not defined for a register
+  // to be live.
+  return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi) : 
+  AntiDepBreaker(), MF(MFi),
+  MRI(MF.getRegInfo()),
+  TRI(MF.getTarget().getRegisterInfo()),
+  AllocatableSet(TRI->getAllocatableSet(MF)),
+  State(NULL), SavedState(NULL) {
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+  delete State;
+  delete SavedState;
+}
+
+unsigned AggressiveAntiDepBreaker::GetMaxTrials() {
+  if (AntiDepTrials <= 0)
+    return 1;
+  return AntiDepTrials;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+  assert(State == NULL);
+  State = new AggressiveAntiDepState(BB);
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  unsigned *KillIndices = State->GetKillIndices();
+  unsigned *DefIndices = State->GetDefIndices();
+
+  // Determine the live-out physregs for this block.
+  if (IsReturnBlock) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+         E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      State->UnionGroups(Reg, 0);
+      KillIndices[Reg] = BB->size();
+      DefIndices[Reg] = ~0u;
+      // Repeat, for all aliases.
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        unsigned AliasReg = *Alias;
+        State->UnionGroups(AliasReg, 0);
+        KillIndices[AliasReg] = BB->size();
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  } else {
+    // In a non-return block, examine the live-in regs of all successors.
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+           E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
+        // Repeat, for all aliases.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          State->UnionGroups(AliasReg, 0);
+          KillIndices[AliasReg] = BB->size();
+          DefIndices[AliasReg] = ~0u;
+        }
+      }
+  }
+
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
+  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+    unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+    State->UnionGroups(Reg, 0);
+    KillIndices[Reg] = BB->size();
+    DefIndices[Reg] = ~0u;
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      State->UnionGroups(AliasReg, 0);
+      KillIndices[AliasReg] = BB->size();
+      DefIndices[AliasReg] = ~0u;
+    }
+  }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+  delete State;
+  State = NULL;
+  delete SavedState;
+  SavedState = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+                                     unsigned InsertPosIndex) {
+  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+  std::set<unsigned> PassthruRegs;
+  GetPassthruRegs(MI, PassthruRegs);
+  PrescanInstruction(MI, Count, PassthruRegs);
+  ScanInstruction(MI, Count);
+
+  DEBUG(errs() << "Observe: ");
+  DEBUG(MI->dump());
+  DEBUG(errs() << "\tRegs:");
+
+  unsigned *DefIndices = State->GetDefIndices();
+  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
+    // If Reg is current live, then mark that it can't be renamed as
+    // we don't know the extent of its live-range anymore (now that it
+    // has been scheduled). If it is not live but was defined in the
+    // previous schedule region, then set its def index to the most
+    // conservative location (i.e. the beginning of the previous
+    // schedule region).
+    if (State->IsLive(Reg)) {
+      DEBUG(if (State->GetGroup(Reg) != 0)
+              errs() << " " << TRI->getName(Reg) << "=g" << 
+                State->GetGroup(Reg) << "->g0(region live-out)");
+      State->UnionGroups(Reg, 0);
+    } else if ((DefIndices[Reg] < InsertPosIndex) && (DefIndices[Reg] >= Count)) {
+      DefIndices[Reg] = Count;
+    }
+  }
+  DEBUG(errs() << '\n');
+
+  // We're starting a new schedule region so forget any saved state.
+  delete SavedState;
+  SavedState = NULL;
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+                                            MachineOperand& MO)
+{
+  if (!MO.isReg() || !MO.isImplicit())
+    return false;
+
+  unsigned Reg = MO.getReg();
+  if (Reg == 0)
+    return false;
+
+  MachineOperand *Op = NULL;
+  if (MO.isDef())
+    Op = MI->findRegisterUseOperand(Reg, true);
+  else
+    Op = MI->findRegisterDefOperand(Reg);
+
+  return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+                                           std::set<unsigned>& PassthruRegs) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || 
+        IsImplicitDefUse(MI, MO)) {
+      const unsigned Reg = MO.getReg();
+      PassthruRegs.insert(Reg);
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        PassthruRegs.insert(*Subreg);
+      }
+    }
+  }
+}
+
+/// AntiDepPathStep - Return SUnit that SU has an anti-dependence on.
+static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
+                            std::vector<SDep*>& Edges) {
+  AntiDepBreaker::AntiDepRegSet RegSet;
+  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+    RegSet.insert(Regs[i]);
+
+  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    if (P->getKind() == SDep::Anti) {
+      unsigned Reg = P->getReg();
+      if (RegSet.count(Reg) != 0) {
+        Edges.push_back(&*P);
+        RegSet.erase(Reg);
+      }
+    }
+  }
+
+  assert(RegSet.empty() && "Expected all antidep registers to be found");
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+                                             const char *tag) {
+  unsigned *KillIndices = State->GetKillIndices();
+  unsigned *DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+    RegRefs = State->GetRegRefs();
+
+  if (!State->IsLive(Reg)) {
+    KillIndices[Reg] = KillIdx;
+    DefIndices[Reg] = ~0u;
+    RegRefs.erase(Reg);
+    State->LeaveGroup(Reg);
+    DEBUG(errs() << "->g" << State->GetGroup(Reg) << tag);
+  }
+  // Repeat for subregisters.
+  for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+       *Subreg; ++Subreg) {
+    unsigned SubregReg = *Subreg;
+    if (!State->IsLive(SubregReg)) {
+      KillIndices[SubregReg] = KillIdx;
+      DefIndices[SubregReg] = ~0u;
+      RegRefs.erase(SubregReg);
+      State->LeaveGroup(SubregReg);
+      DEBUG(errs() << " " << TRI->getName(SubregReg) << "->g" <<
+            State->GetGroup(SubregReg) << tag);
+    }
+  }
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count,
+                                              std::set<unsigned>& PassthruRegs) {
+  unsigned *DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+    RegRefs = State->GetRegRefs();
+
+  // Handle dead defs by simulating a last-use of the register just
+  // after the def. A dead def can occur because the def is truely
+  // dead, or because only a subregister is live at the def. If we
+  // don't do this the dead def will be incorrectly merged into the
+  // previous def.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    
+    DEBUG(errs() << "\tDead Def: " << TRI->getName(Reg));
+    HandleLastUse(Reg, Count + 1, "");
+    DEBUG(errs() << '\n');
+  }
+
+  DEBUG(errs() << "\tDef Groups:");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    DEBUG(errs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); 
+
+    // If MI's defs have a special allocation requirement, don't allow
+    // any def registers to be changed. Also assume all registers
+    // defined in a call must not be changed (ABI).
+    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) {
+      DEBUG(if (State->GetGroup(Reg) != 0) errs() << "->g0(alloc-req)");
+      State->UnionGroups(Reg, 0);
+    }
+
+    // Any aliased that are live at this point are completely or
+    // partially defined here, so group those aliases with Reg.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      if (State->IsLive(AliasReg)) {
+        State->UnionGroups(Reg, AliasReg);
+        DEBUG(errs() << "->g" << State->GetGroup(Reg) << "(via " << 
+              TRI->getName(AliasReg) << ")");
+      }
+    }
+    
+    // Note register reference...
+    const TargetRegisterClass *RC = NULL;
+    if (i < MI->getDesc().getNumOperands())
+      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+    AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+    RegRefs.insert(std::make_pair(Reg, RR));
+  }
+
+  DEBUG(errs() << '\n');
+
+  // Scan the register defs for this instruction and update
+  // live-ranges.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    // Ignore passthru registers for liveness...
+    if (PassthruRegs.count(Reg) != 0) continue;
+
+    // Update def for Reg and subregs.
+    DefIndices[Reg] = Count;
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg) {
+      unsigned SubregReg = *Subreg;
+      DefIndices[SubregReg] = Count;
+    }
+  }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+                                           unsigned Count) {
+  DEBUG(errs() << "\tUse Groups:");
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+    RegRefs = State->GetRegRefs();
+
+  // Scan the register uses for this instruction and update
+  // live-ranges, groups and RegRefs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    
+    DEBUG(errs() << " " << TRI->getName(Reg) << "=g" << 
+          State->GetGroup(Reg)); 
+
+    // It wasn't previously live but now it is, this is a kill. Forget
+    // the previous live-range information and start a new live-range
+    // for the register.
+    HandleLastUse(Reg, Count, "(last-use)");
+
+    // If MI's uses have special allocation requirement, don't allow
+    // any use registers to be changed. Also assume all registers
+    // used in a call must not be changed (ABI).
+    if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) {
+      DEBUG(if (State->GetGroup(Reg) != 0) errs() << "->g0(alloc-req)");
+      State->UnionGroups(Reg, 0);
+    }
+
+    // Note register reference...
+    const TargetRegisterClass *RC = NULL;
+    if (i < MI->getDesc().getNumOperands())
+      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+    AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+    RegRefs.insert(std::make_pair(Reg, RR));
+  }
+  
+  DEBUG(errs() << '\n');
+
+  // Form a group of all defs and uses of a KILL instruction to ensure
+  // that all registers are renamed as a group.
+  if (MI->getOpcode() == TargetInstrInfo::KILL) {
+    DEBUG(errs() << "\tKill Group:");
+
+    unsigned FirstReg = 0;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      
+      if (FirstReg != 0) {
+        DEBUG(errs() << "=" << TRI->getName(Reg));
+        State->UnionGroups(FirstReg, Reg);
+      } else {
+        DEBUG(errs() << " " << TRI->getName(Reg));
+        FirstReg = Reg;
+      }
+    }
+  
+    DEBUG(errs() << "->g" << State->GetGroup(FirstReg) << '\n');
+  }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+  BitVector BV(TRI->getNumRegs(), false);
+  bool first = true;
+
+  // Check all references that need rewriting for Reg. For each, use
+  // the corresponding register class to narrow the set of registers
+  // that are appropriate for renaming.
+  std::pair<std::multimap<unsigned, 
+                     AggressiveAntiDepState::RegisterReference>::iterator,
+            std::multimap<unsigned,
+                     AggressiveAntiDepState::RegisterReference>::iterator>
+    Range = State->GetRegRefs().equal_range(Reg);
+  for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator
+         Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+    const TargetRegisterClass *RC = Q->second.RC;
+    if (RC == NULL) continue;
+
+    BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+    if (first) {
+      BV |= RCBV;
+      first = false;
+    } else {
+      BV &= RCBV;
+    }
+
+    DEBUG(errs() << " " << RC->getName());
+  }
+  
+  return BV;
+}  
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+                          unsigned AntiDepGroupIndex,
+                          std::map<unsigned, unsigned> &RenameMap) {
+  unsigned *KillIndices = State->GetKillIndices();
+  unsigned *DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+    RegRefs = State->GetRegRefs();
+
+  // Collect all registers in the same group as AntiDepReg. These all
+  // need to be renamed together if we are to break the
+  // anti-dependence.
+  std::vector<unsigned> Regs;
+  State->GetGroupRegs(AntiDepGroupIndex, Regs);
+  assert(Regs.size() > 0 && "Empty register group!");
+  if (Regs.size() == 0)
+    return false;
+
+  // Find the "superest" register in the group. At the same time,
+  // collect the BitVector of registers that can be used to rename
+  // each register.
+  DEBUG(errs() << "\tRename Candidates for Group g" << AntiDepGroupIndex << ":\n");
+  std::map<unsigned, BitVector> RenameRegisterMap;
+  unsigned SuperReg = 0;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    unsigned Reg = Regs[i];
+    if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+      SuperReg = Reg;
+
+    // If Reg has any references, then collect possible rename regs
+    if (RegRefs.count(Reg) > 0) {
+      DEBUG(errs() << "\t\t" << TRI->getName(Reg) << ":");
+    
+      BitVector BV = GetRenameRegisters(Reg);
+      RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+      DEBUG(errs() << " ::");
+      DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+              errs() << " " << TRI->getName(r));
+      DEBUG(errs() << "\n");
+    }
+  }
+
+  // All group registers should be a subreg of SuperReg.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    unsigned Reg = Regs[i];
+    if (Reg == SuperReg) continue;
+    bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+    assert(IsSub && "Expecting group subregister");
+    if (!IsSub)
+      return false;
+  }
+
+  // FIXME: for now just handle single register in group case...
+  // FIXME: check only regs that have references...
+  if (Regs.size() > 1)
+    return false;
+
+  // Check each possible rename register for SuperReg. If that register
+  // is available, and the corresponding registers are available for
+  // the other group subregisters, then we can use those registers to
+  // rename.
+  DEBUG(errs() << "\tFind Register:");
+  BitVector SuperBV = RenameRegisterMap[SuperReg];
+  for (int r = SuperBV.find_first(); r != -1; r = SuperBV.find_next(r)) {
+    const unsigned Reg = (unsigned)r;
+    // Don't replace a register with itself.
+    if (Reg == SuperReg) continue;
+
+    DEBUG(errs() << " " << TRI->getName(Reg));
+      
+    // If Reg is dead and Reg's most recent def is not before
+    // SuperRegs's kill, it's safe to replace SuperReg with
+    // Reg. We must also check all subregisters of Reg.
+    if (State->IsLive(Reg) || (KillIndices[SuperReg] > DefIndices[Reg])) {
+      DEBUG(errs() << "(live)");
+      continue;
+    } else {
+      bool found = false;
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        unsigned SubregReg = *Subreg;
+        if (State->IsLive(SubregReg) || (KillIndices[SuperReg] > DefIndices[SubregReg])) {
+          DEBUG(errs() << "(subreg " << TRI->getName(SubregReg) << " live)");
+          found = true;
+          break;
+        }
+      }
+      if (found)
+        continue;
+    }
+      
+    if (Reg != 0) { 
+      DEBUG(errs() << '\n');
+      RenameMap.insert(std::pair<unsigned, unsigned>(SuperReg, Reg));
+      return true;
+    }
+  }
+
+  DEBUG(errs() << '\n');
+
+  // No registers are free and available!
+  return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+                              std::vector<SUnit>& SUnits,
+                              CandidateMap& Candidates,
+                              MachineBasicBlock::iterator& Begin,
+                              MachineBasicBlock::iterator& End,
+                              unsigned InsertPosIndex) {
+  unsigned *KillIndices = State->GetKillIndices();
+  unsigned *DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+    RegRefs = State->GetRegRefs();
+
+  // Nothing to do if no candidates.
+  if (Candidates.empty()) {
+    DEBUG(errs() << "\n===== No anti-dependency candidates\n");
+    return 0;
+  }
+
+  // The code below assumes that there is at least one instruction,
+  // so just duck out immediately if the block is empty.
+  if (SUnits.empty()) return 0;
+  
+  // Manage saved state to enable multiple passes...
+  if (AntiDepTrials > 1) {
+    if (SavedState == NULL) {
+      SavedState = new AggressiveAntiDepState(*State);
+    } else {
+      delete State;
+      State = new AggressiveAntiDepState(*SavedState);
+    }
+  }
+
+  // ...need a map from MI to SUnit.
+  std::map<MachineInstr *, SUnit *> MISUnitMap;
+
+  DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << 
+        " anti-dependencies\n");
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
+  }
+
+#ifndef NDEBUG 
+  {
+    DEBUG(errs() << "Available regs:");
+    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+      if (!State->IsLive(Reg))
+        DEBUG(errs() << " " << TRI->getName(Reg));
+    }
+    DEBUG(errs() << '\n');
+  }
+#endif
+
+  // Attempt to break anti-dependence edges. Walk the instructions
+  // from the bottom up, tracking information about liveness as we go
+  // to help determine which registers are available.
+  unsigned Broken = 0;
+  unsigned Count = InsertPosIndex - 1;
+  for (MachineBasicBlock::iterator I = End, E = Begin;
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+
+    DEBUG(errs() << "Anti: ");
+    DEBUG(MI->dump());
+
+    std::set<unsigned> PassthruRegs;
+    GetPassthruRegs(MI, PassthruRegs);
+
+    // Process the defs in MI...
+    PrescanInstruction(MI, Count, PassthruRegs);
+
+    std::vector<SDep*> Edges;
+    SUnit *PathSU = MISUnitMap[MI];
+    AntiDepBreaker::CandidateMap::iterator 
+      citer = Candidates.find(PathSU);
+    if (citer != Candidates.end())
+      AntiDepPathStep(PathSU, citer->second, Edges);
+      
+    // Ignore KILL instructions (they form a group in ScanInstruction
+    // but don't cause any anti-dependence breaking themselves)
+    if (MI->getOpcode() != TargetInstrInfo::KILL) {
+      // Attempt to break each anti-dependency...
+      for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+        SDep *Edge = Edges[i];
+        SUnit *NextSU = Edge->getSUnit();
+        
+        if (Edge->getKind() != SDep::Anti) continue;
+        
+        unsigned AntiDepReg = Edge->getReg();
+        DEBUG(errs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+        assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+        
+        if (!AllocatableSet.test(AntiDepReg)) {
+          // Don't break anti-dependencies on non-allocatable registers.
+          DEBUG(errs() << " (non-allocatable)\n");
+          continue;
+        } else if (PassthruRegs.count(AntiDepReg) != 0) {
+          // If the anti-dep register liveness "passes-thru", then
+          // don't try to change it. It will be changed along with
+          // the use if required to break an earlier antidep.
+          DEBUG(errs() << " (passthru)\n");
+          continue;
+        } else {
+          // No anti-dep breaking for implicit deps
+          MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+          assert(AntiDepOp != NULL && "Can't find index for defined register operand");
+          if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+            DEBUG(errs() << " (implicit)\n");
+            continue;
+          }
+          
+          // If the SUnit has other dependencies on the SUnit that
+          // it anti-depends on, don't bother breaking the
+          // anti-dependency since those edges would prevent such
+          // units from being scheduled past each other
+          // regardless.
+          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti)) {
+              DEBUG(errs() << " (real dependency)\n");
+              AntiDepReg = 0;
+              break;
+            }
+          }
+          
+          if (AntiDepReg == 0) continue;
+        }
+        
+        assert(AntiDepReg != 0);
+        if (AntiDepReg == 0) continue;
+        
+        // Determine AntiDepReg's register group.
+        const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+        if (GroupIndex == 0) {
+          DEBUG(errs() << " (zero group)\n");
+          continue;
+        }
+        
+        DEBUG(errs() << '\n');
+        
+        // Look for a suitable register to use to break the anti-dependence.
+        std::map<unsigned, unsigned> RenameMap;
+        if (FindSuitableFreeRegisters(GroupIndex, RenameMap)) {
+          DEBUG(errs() << "\tBreaking anti-dependence edge on "
+                << TRI->getName(AntiDepReg) << ":");
+          
+          // Handle each group register...
+          for (std::map<unsigned, unsigned>::iterator
+                 S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+            unsigned CurrReg = S->first;
+            unsigned NewReg = S->second;
+            
+            DEBUG(errs() << " " << TRI->getName(CurrReg) << "->" << 
+                  TRI->getName(NewReg) << "(" <<  
+                  RegRefs.count(CurrReg) << " refs)");
+            
+            // Update the references to the old register CurrReg to
+            // refer to the new register NewReg.
+            std::pair<std::multimap<unsigned, 
+                              AggressiveAntiDepState::RegisterReference>::iterator,
+                      std::multimap<unsigned,
+                              AggressiveAntiDepState::RegisterReference>::iterator>
+              Range = RegRefs.equal_range(CurrReg);
+            for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator
+                   Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+              Q->second.Operand->setReg(NewReg);
+            }
+            
+            // We just went back in time and modified history; the
+            // liveness information for CurrReg is now inconsistent. Set
+            // the state as if it were dead.
+            State->UnionGroups(NewReg, 0);
+            RegRefs.erase(NewReg);
+            DefIndices[NewReg] = DefIndices[CurrReg];
+            KillIndices[NewReg] = KillIndices[CurrReg];
+            
+            State->UnionGroups(CurrReg, 0);
+            RegRefs.erase(CurrReg);
+            DefIndices[CurrReg] = KillIndices[CurrReg];
+            KillIndices[CurrReg] = ~0u;
+            assert(((KillIndices[CurrReg] == ~0u) !=
+                    (DefIndices[CurrReg] == ~0u)) &&
+                   "Kill and Def maps aren't consistent for AntiDepReg!");
+          }
+          
+          ++Broken;
+          DEBUG(errs() << '\n');
+        }
+      }
+    }
+
+    ScanInstruction(MI, Count);
+  }
+  
+  return Broken;
+}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 0000000..5d9b40b
--- /dev/null
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,176 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+  /// Class AggressiveAntiDepState 
+  /// Contains all the state necessary for anti-dep breaking. We place
+  /// into a separate class so be can conveniently save/restore it to
+  /// enable multi-pass anti-dep breaking.
+  class AggressiveAntiDepState {
+  public:
+    /// RegisterReference - Information about a register reference
+    /// within a liverange
+    typedef struct {
+      /// Operand - The registers operand
+      MachineOperand *Operand;
+      /// RC - The register class
+      const TargetRegisterClass *RC;
+    } RegisterReference;
+
+  private:
+    /// GroupNodes - Implements a disjoint-union data structure to
+    /// form register groups. A node is represented by an index into
+    /// the vector. A node can "point to" itself to indicate that it
+    /// is the parent of a group, or point to another node to indicate
+    /// that it is a member of the same group as that node.
+    std::vector<unsigned> GroupNodes;
+  
+    /// GroupNodeIndices - For each register, the index of the GroupNode
+    /// currently representing the group that the register belongs to.
+    /// Register 0 is always represented by the 0 group, a group
+    /// composed of registers that are not eligible for anti-aliasing.
+    unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister];
+  
+    /// RegRefs - Map registers to all their references within a live range.
+    std::multimap<unsigned, RegisterReference> RegRefs;
+  
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+  
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
+    unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+  public:
+    AggressiveAntiDepState(MachineBasicBlock *BB);
+    
+    /// GetKillIndices - Return the kill indices.
+    unsigned *GetKillIndices() { return KillIndices; }
+
+    /// GetDefIndices - Return the define indices.
+    unsigned *GetDefIndices() { return DefIndices; }
+
+    /// GetRegRefs - Return the RegRefs map.
+    std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+    // GetGroup - Get the group for a register. The returned value is
+    // the index of the GroupNode representing the group.
+    unsigned GetGroup(unsigned Reg);
+    
+    // GetGroupRegs - Return a vector of the registers belonging to a
+    // group.
+    void GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs);
+
+    // UnionGroups - Union Reg1's and Reg2's groups to form a new
+    // group. Return the index of the GroupNode representing the
+    // group.
+    unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+    // LeaveGroup - Remove a register from its current group and place
+    // it alone in its own group. Return the index of the GroupNode
+    // representing the registers new group.
+    unsigned LeaveGroup(unsigned Reg);
+
+    /// IsLive - Return true if Reg is live
+    bool IsLive(unsigned Reg);
+  };
+
+
+  /// Class AggressiveAntiDepBreaker 
+  class AggressiveAntiDepBreaker : public AntiDepBreaker {
+    MachineFunction& MF;
+    MachineRegisterInfo &MRI;
+    const TargetRegisterInfo *TRI;
+
+    /// AllocatableSet - The set of allocatable registers.
+    /// We'll be ignoring anti-dependencies on non-allocatable registers,
+    /// because they may not be safe to break.
+    const BitVector AllocatableSet;
+
+    /// State - The state used to identify and rename anti-dependence
+    /// registers.
+    AggressiveAntiDepState *State;
+
+    /// SavedState - The state for the start of an anti-dep
+    /// region. Used to restore the state at the beginning of each
+    /// pass
+    AggressiveAntiDepState *SavedState;
+
+  public:
+    AggressiveAntiDepBreaker(MachineFunction& MFi);
+    ~AggressiveAntiDepBreaker();
+    
+    /// GetMaxTrials - As anti-dependencies are broken, additional
+    /// dependencies may be exposed, so multiple passes are required.
+    unsigned GetMaxTrials();
+
+    /// NeedCandidates - Candidates required.
+    bool NeedCandidates() { return true; }
+
+    /// Start - Initialize anti-dep breaking for a new basic block.
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+    /// of the ScheduleDAG and break them by renaming registers.
+    ///
+    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
+                                   CandidateMap& Candidates,
+                                   MachineBasicBlock::iterator& Begin,
+                                   MachineBasicBlock::iterator& End,
+                                   unsigned InsertPosIndex);
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+    /// Finish - Finish anti-dep breaking for a basic block.
+    void FinishBlock();
+
+  private:
+    /// IsImplicitDefUse - Return true if MO represents a register
+    /// that is both implicitly used and defined in MI
+    bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+    
+    /// GetPassthruRegs - If MI implicitly def/uses a register, then
+    /// return that register and all subregisters.
+    void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag);
+    void PrescanInstruction(MachineInstr *MI, unsigned Count,
+                            std::set<unsigned>& PassthruRegs);
+    void ScanInstruction(MachineInstr *MI, unsigned Count);
+    BitVector GetRenameRegisters(unsigned Reg);
+    bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+                                   std::map<unsigned, unsigned> &RenameMap);
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 0000000..2775087
--- /dev/null
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,73 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+  typedef SmallSet<unsigned, 4> AntiDepRegSet;
+  typedef SmallVector<unsigned, 4> AntiDepRegVector;
+  typedef std::map<SUnit *, AntiDepRegVector> CandidateMap;
+
+  virtual ~AntiDepBreaker();
+
+  /// GetMaxTrials - Return the maximum number of anti-dependence
+  /// breaking attempts that will be made for a block.
+  virtual unsigned GetMaxTrials() =0;
+
+  /// NeedCandidates - Return true if the schedule must provide
+  /// candidates with BreakAntiDependencies().
+  virtual bool NeedCandidates() =0;
+
+  /// Start - Initialize anti-dep breaking for a new basic block.
+  virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+  /// BreakAntiDependencies - Identifiy anti-dependencies within a
+  /// basic-block region and break them by renaming registers. Return
+  /// the number of anti-dependencies broken.
+  ///
+  virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
+                                CandidateMap& Candidates,
+                                MachineBasicBlock::iterator& Begin,
+                                MachineBasicBlock::iterator& End,
+                                unsigned InsertPosIndex) =0;
+  
+  /// Observe - Update liveness information to account for the current
+  /// instruction, which will not be scheduled.
+  ///
+  virtual void Observe(MachineInstr *MI, unsigned Count,
+                       unsigned InsertPosIndex) =0;
+  
+  /// Finish - Finish anti-dep breaking for a basic block.
+  virtual void FinishBlock() =0;
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8bc5ef9..58f3aa5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -919,6 +919,8 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
     default:
       llvm_unreachable("Unsupported operator!");
     }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    GetBlockAddressSymbol(BA)->print(O, MAI);
   } else {
     llvm_unreachable("Unknown constant value!");
   }
@@ -1366,6 +1368,7 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI,
 	  unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
 	  				    CurDLT.Scope);
           printLabel(L);
+          O << '\n';
 #ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
           DW->SetDbgScopeBeginLabels(MI, L);
 #endif
@@ -1613,6 +1616,24 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
+                                            const BasicBlock *BB) const {
+  assert(BB->hasName() &&
+         "Address of anonymous basic block not supported yet!");
+
+  // FIXME: This isn't guaranteed to produce a unique name even if the
+  // block and function have a name.
+  std::string Mangled =
+    Mang->getMangledName(F, Mang->makeNameProper(BB->getName()).c_str(),
+                         /*ForcePrivate=*/true);
+
+  return OutContext.GetOrCreateSymbol(StringRef(Mangled));
+}
+
 MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
   SmallString<60> Name;
   raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB"
@@ -1626,9 +1647,27 @@ MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
 /// MachineBasicBlock, an alignment (if present) and a comment describing
 /// it if appropriate.
 void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+  // Emit an alignment directive for this block, if needed.
   if (unsigned Align = MBB->getAlignment())
     EmitAlignment(Log2_32(Align));
 
+  // If the block has its address taken, emit a special label to satisfy
+  // references to the block. This is done so that we don't need to
+  // remember the number of this label, and so that we can make
+  // forward references to labels without knowing what their numbers
+  // will be.
+  if (MBB->hasAddressTaken()) {
+    GetBlockAddressSymbol(MBB->getBasicBlock()->getParent(),
+                          MBB->getBasicBlock())->print(O, MAI);
+    O << ':';
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << " Address Taken";
+    }
+    O << '\n';
+  }
+
+  // Print the main label for the block.
   if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
     if (VerboseAsm)
       O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
@@ -1639,6 +1678,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       O << '\n';
   }
   
+  // Print some comments to accompany the label.
   if (VerboseAsm) {
     if (const BasicBlock *BB = MBB->getBasicBlock())
       if (BB->hasName()) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d50e5e3..23752c4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -124,7 +124,7 @@ public:
 //===----------------------------------------------------------------------===//
 /// DbgVariable - This class is used to track local variable information.
 ///
-class VISIBILITY_HIDDEN DbgVariable {
+class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   unsigned FrameIndex;               // Variable frame index.
   bool InlinedFnVar;                 // Variable for an inlined function.
@@ -142,7 +142,7 @@ public:
 /// DbgScope - This class is used to track scope information.
 ///
 class DbgConcreteScope;
-class VISIBILITY_HIDDEN DbgScope {
+class DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
   DIDescriptor Desc;                  // Debug info descriptor for scope.
                                       // FIXME use WeakVH for Desc.
@@ -1249,6 +1249,9 @@ CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
 DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
   // Get the descriptor.
   const DIVariable &VD = DV->getVariable();
+  const char *Name = VD.getName();
+  if (!Name)
+    return NULL;
 
   // Translate tag to proper Dwarf tag.  The result variable is dropped for
   // now.
@@ -1267,7 +1270,6 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-  const char *Name = VD.getName();
   AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add source line info if available.
@@ -1304,15 +1306,16 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 ///
 DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
                                   MDNode *InlinedAt) {
-  DbgScope *&Slot = DbgScopeMap[N];
-  if (Slot) return Slot;
+  ValueMap<MDNode *, DbgScope *>::iterator VI = DbgScopeMap.find(N);
+  if (VI != DbgScopeMap.end())
+    return VI->second;
 
   DbgScope *Parent = NULL;
 
   if (InlinedAt) {
     DILocation IL(InlinedAt);
     assert (!IL.isNull() && "Invalid InlindAt location!");
-    DenseMap<MDNode *, DbgScope *>::iterator DSI = 
+    ValueMap<MDNode *, DbgScope *>::iterator DSI = 
       DbgScopeMap.find(IL.getScope().getNode());
     assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!");
     Parent = DSI->second;
@@ -1334,17 +1337,18 @@ DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
       assert (0 && "Unexpected scope info");
   }
 
-  Slot = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
-  Slot->setFirstInsn(MI);
+  DbgScope *NScope = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
+  NScope->setFirstInsn(MI);
 
   if (Parent)
-    Parent->AddScope(Slot);
+    Parent->AddScope(NScope);
   else
     // First function is top level function.
     if (!FunctionDbgScope)
-      FunctionDbgScope = Slot;
+      FunctionDbgScope = NScope;
 
-  return Slot;
+  DbgScopeMap.insert(std::make_pair(N, NScope));
+  return NScope;
 }
 
 
@@ -1812,7 +1816,7 @@ void DwarfDebug::CollectVariableInfo() {
     if (DV.isNull()) continue;
     unsigned VSlot = VI->second;
     DbgScope *Scope = NULL;
-    DenseMap<MDNode *, DbgScope *>::iterator DSI = 
+    ValueMap<MDNode *, DbgScope *>::iterator DSI = 
       DbgScopeMap.find(DV.getContext().getNode());
     if (DSI != DbgScopeMap.end()) 
       Scope = DSI->second;
@@ -1884,8 +1888,10 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
 
   // If a scope's last instruction is not set then use its child scope's
   // last instruction as this scope's last instrunction.
-  for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
+  for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
 	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
+    DbgScope *S = DI->second;
+    if (!S) continue;
     assert (DI->second->getFirstInsn() && "Invalid first instruction!");
     DI->second->FixInstructionMarkers();
     assert (DI->second->getLastInsn() && "Invalid last instruction!");
@@ -1895,10 +1901,10 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
   // and end of a scope respectively. Create an inverse map that list scopes
   // starts (and ends) with an instruction. One instruction may start (or end)
   // multiple scopes.
-  for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
+  for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
 	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
     DbgScope *S = DI->second;
-    assert (S && "DbgScope is missing!");
+    if (!S) continue;
     const MachineInstr *MI = S->getFirstInsn();
     assert (MI && "DbgScope does not have first instruction!");
 
@@ -2172,7 +2178,7 @@ void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) {
     if (!SP.isNull()) {
       // SP is inserted into DbgAbstractScopeMap when inlined function
       // start was recorded by RecordInlineFnStart.
-      DenseMap<MDNode *, DbgScope *>::iterator
+      ValueMap<MDNode *, DbgScope *>::iterator
         I = DbgAbstractScopeMap.find(SP.getNode());
       if (I != DbgAbstractScopeMap.end()) {
         InlinedVar = true;
@@ -2249,7 +2255,7 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
 
   // Keep track of the concrete scope that's inlined into this function.
-  DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
+  ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
     SI = DbgConcreteScopeMap.find(Node);
 
   if (SI == DbgConcreteScopeMap.end())
@@ -2258,7 +2264,7 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
     SI->second.push_back(ConcreteScope);
 
   // Track the start label for this inlined function.
-  DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator
+  ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
     I = InlineInfo.find(Node);
 
   if (I == InlineInfo.end())
@@ -2281,7 +2287,7 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
     DebugTimer->startTimer();
 
   MDNode *Node = SP.getNode();
-  DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
+  ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
     I = DbgConcreteScopeMap.find(Node);
 
   if (I == DbgConcreteScopeMap.end()) {
@@ -2989,7 +2995,7 @@ void DwarfDebug::EmitDebugInlineInfo() {
   Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
   Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
 
-  for (DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator
+  for (ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
          I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
     MDNode *Node = I->first;
     SmallVector<unsigned, 4> &Labels = I->second;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 7f71104..ddb0a15 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -20,7 +20,7 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringMap.h"
@@ -139,7 +139,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   DbgScope *FunctionDbgScope;
   
   /// DbgScopeMap - Tracks the scopes in the current function.
-  DenseMap<MDNode *, DbgScope *> DbgScopeMap;
+  ValueMap<MDNode *, DbgScope *> DbgScopeMap;
 
   /// ScopedGVs - Tracks global variables that are not at file scope.
   /// For example void f() { static int b = 42; }
@@ -156,16 +156,16 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
 
   /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
   /// function.
-  DenseMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
+  ValueMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
 
   /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
   /// function.
-  DenseMap<MDNode *,
+  ValueMap<MDNode *,
            SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
-  DenseMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
+  ValueMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
 
   /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
   /// functions. These are subroutine entries that contain a DW_AT_inline
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 06b92b7..9286ad5 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -20,14 +20,13 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
 
-  class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter {
+  class OcamlGCMetadataPrinter : public GCMetadataPrinter {
   public:
     void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
                        const MCAsmInfo &MAI);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 66c5aa5..baea964 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -18,6 +18,7 @@
 
 #define DEBUG_TYPE "branchfolding"
 #include "BranchFolding.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -444,6 +445,36 @@ static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
     }
 }
 
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails.  Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+                              MachineBasicBlock *MBB2,
+                              unsigned minCommonTailLength,
+                              unsigned &CommonTailLen,
+                              MachineBasicBlock::iterator &I1,
+                              MachineBasicBlock::iterator &I2) {
+  CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+  MachineFunction *MF = MBB1->getParent();
+
+  if (CommonTailLen >= minCommonTailLength)
+    return true;
+
+  if (CommonTailLen == 0)
+    return false;
+
+  // If we are optimizing for code size, 1 instruction in common is enough if
+  // we don't have to split a block.  At worst we will be replacing a
+  // fallthrough into the common tail with a branch, which at worst breaks
+  // even with falling through into the duplicated common tail.
+  if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+      (I1 == MBB1->begin() || I2 == MBB2->begin()))
+    return true;
+
+  return false;
+}
+
 /// ComputeSameTails - Look through all the blocks in MergePotentials that have
 /// hash CurHash (guaranteed to match the last element).   Build the vector 
 /// SameTails of all those that have the (same) largest number of instructions
@@ -465,22 +496,9 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
        CurMPIter!=B && CurMPIter->first==CurHash;
        --CurMPIter) {
     for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
-      unsigned CommonTailLen = ComputeCommonTailLength(
-                                        CurMPIter->second,
-                                        I->second,
-                                        TrialBBI1, TrialBBI2);
-      // If we will have to split a block, there should be at least
-      // minCommonTailLength instructions in common; if not, at worst
-      // we will be replacing a fallthrough into the common tail with a
-      // branch, which at worst breaks even with falling through into
-      // the duplicated common tail, so 1 instruction in common is enough.
-      // We will always pick a block we do not have to split as the common
-      // tail if there is one.
-      // (Empty blocks will get forwarded and need not be considered.)
-      if (CommonTailLen >= minCommonTailLength ||
-          (CommonTailLen > 0 &&
-           (TrialBBI1==CurMPIter->second->begin() ||
-            TrialBBI2==I->second->begin()))) {
+      unsigned CommonTailLen;
+      if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength,
+                            CommonTailLen, TrialBBI1, TrialBBI2)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
@@ -863,8 +881,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   
   // If this block is empty, make everyone use its fall-through, not the block
   // explicitly.  Landing pads should not do this since the landing-pad table
-  // points to this block.
-  if (MBB->empty() && !MBB->isLandingPad()) {
+  // points to this block.  Blocks with their addresses taken shouldn't be
+  // optimized away.
+  if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
     // Dead block?  Leave for cleanup later.
     if (MBB->pred_empty()) return MadeChange;
     
@@ -1031,7 +1050,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     // If this branch is the only thing in its block, see if we can forward
     // other blocks across it.
     if (CurTBB && CurCond.empty() && CurFBB == 0 && 
-        MBB->begin()->getDesc().isBranch() && CurTBB != MBB) {
+        MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
+        !MBB->hasAddressTaken()) {
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 713c30c..9583edc 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,6 +1,8 @@
 add_llvm_library(LLVMCodeGen
+  AggressiveAntiDepBreaker.cpp
   BranchFolding.cpp
   CodePlacementOpt.cpp
+  CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
   DwarfEHPrepare.cpp
   ELFCodeEmitter.cpp
@@ -40,6 +42,7 @@ add_llvm_library(LLVMCodeGen
   Passes.cpp
   PostRASchedulerList.cpp
   PreAllocSplitting.cpp
+  ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
   RegAllocLinearScan.cpp
@@ -55,6 +58,7 @@ add_llvm_library(LLVMCodeGen
   ShrinkWrapping.cpp
   SimpleRegisterCoalescing.cpp
   SjLjEHPrepare.cpp
+  SlotIndexes.cpp
   Spiller.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 0000000..984e013
--- /dev/null
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,540 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi) : 
+  AntiDepBreaker(), MF(MFi),
+  MRI(MF.getRegInfo()),
+  TRI(MF.getTarget().getRegisterInfo()),
+  AllocatableSet(TRI->getAllocatableSet(MF))
+{
+}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+  // Clear out the register class data.
+  std::fill(Classes, array_endof(Classes),
+            static_cast<const TargetRegisterClass *>(0));
+
+  // Initialize the indices to indicate that no registers are live.
+  std::fill(KillIndices, array_endof(KillIndices), ~0u);
+  std::fill(DefIndices, array_endof(DefIndices), BB->size());
+
+  // Clear "do not change" set.
+  KeepRegs.clear();
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
+  // Determine the live-out physregs for this block.
+  if (IsReturnBlock) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+         E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = BB->size();
+      DefIndices[Reg] = ~0u;
+      // Repeat, for all aliases.
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        unsigned AliasReg = *Alias;
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        KillIndices[AliasReg] = BB->size();
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  } else {
+    // In a non-return block, examine the live-in regs of all successors.
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+           E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
+        // Repeat, for all aliases.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+          KillIndices[AliasReg] = BB->size();
+          DefIndices[AliasReg] = ~0u;
+        }
+      }
+  }
+
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
+  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+    unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+    Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+    KillIndices[Reg] = BB->size();
+    DefIndices[Reg] = ~0u;
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[AliasReg] = BB->size();
+      DefIndices[AliasReg] = ~0u;
+    }
+  }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+  RegRefs.clear();
+  KeepRegs.clear();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+                                     unsigned InsertPosIndex) {
+  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+  // Any register which was defined within the previous scheduling region
+  // may have been rescheduled and its lifetime may overlap with registers
+  // in ways not reflected in our current liveness state. For each such
+  // register, adjust the liveness state to be conservatively correct.
+  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
+    if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+      assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
+      // Mark this register to be non-renamable.
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      // Move the def index to the end of the previous region, to reflect
+      // that the def could theoretically have been scheduled at the end.
+      DefIndices[Reg] = InsertPosIndex;
+    }
+
+  PrescanInstruction(MI);
+  ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+  SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    SUnit *PredSU = P->getSUnit();
+    unsigned PredLatency = P->getLatency();
+    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+    // In the case of a latency tie, prefer an anti-dependency edge over
+    // other types of edges.
+    if (NextDepth < PredTotalLatency ||
+        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+      NextDepth = PredTotalLatency;
+      Next = &*P;
+    }
+  }
+  return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+  // Scan the register operands for this instruction and update
+  // Classes and RegRefs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    const TargetRegisterClass *NewRC = 0;
+    
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+
+    // For now, only allow the register to be changed if its register
+    // class is consistent across all uses.
+    if (!Classes[Reg] && NewRC)
+      Classes[Reg] = NewRC;
+    else if (!NewRC || Classes[Reg] != NewRC)
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+    // Now check for aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      // If an alias of the reg is used during the live range, give up.
+      // Note that this allows us to skip checking if AntiDepReg
+      // overlaps with any of the aliases, among other things.
+      unsigned AliasReg = *Alias;
+      if (Classes[AliasReg]) {
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
+    }
+
+    // If we're still willing to consider this register, note the reference.
+    if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+      RegRefs.insert(std::make_pair(Reg, &MO));
+
+    // It's not safe to change register allocation for source operands of
+    // that have special allocation requirements.
+    if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
+      if (KeepRegs.insert(Reg)) {
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          KeepRegs.insert(*Subreg);
+      }
+    }
+  }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+                                             unsigned Count) {
+  // Update liveness.
+  // Proceding upwards, registers that are defed but not used in this
+  // instruction are now dead.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    if (!MO.isDef()) continue;
+    // Ignore two-addr defs.
+    if (MI->isRegTiedToUseOperand(i)) continue;
+
+    DefIndices[Reg] = Count;
+    KillIndices[Reg] = ~0u;
+    assert(((KillIndices[Reg] == ~0u) !=
+            (DefIndices[Reg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for Reg!");
+    KeepRegs.erase(Reg);
+    Classes[Reg] = 0;
+    RegRefs.erase(Reg);
+    // Repeat, for all subregs.
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg) {
+      unsigned SubregReg = *Subreg;
+      DefIndices[SubregReg] = Count;
+      KillIndices[SubregReg] = ~0u;
+      KeepRegs.erase(SubregReg);
+      Classes[SubregReg] = 0;
+      RegRefs.erase(SubregReg);
+    }
+    // Conservatively mark super-registers as unusable.
+    for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+         *Super; ++Super) {
+      unsigned SuperReg = *Super;
+      Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+    }
+  }
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    if (!MO.isUse()) continue;
+
+    const TargetRegisterClass *NewRC = 0;
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+
+    // For now, only allow the register to be changed if its register
+    // class is consistent across all uses.
+    if (!Classes[Reg] && NewRC)
+      Classes[Reg] = NewRC;
+    else if (!NewRC || Classes[Reg] != NewRC)
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+    RegRefs.insert(std::make_pair(Reg, &MO));
+
+    // It wasn't previously live but now it is, this is a kill.
+    if (KillIndices[Reg] == ~0u) {
+      KillIndices[Reg] = Count;
+      DefIndices[Reg] = ~0u;
+          assert(((KillIndices[Reg] == ~0u) !=
+                  (DefIndices[Reg] == ~0u)) &&
+               "Kill and Def maps aren't consistent for Reg!");
+    }
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      if (KillIndices[AliasReg] == ~0u) {
+        KillIndices[AliasReg] = Count;
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  }
+}
+
+unsigned
+CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
+                                                 unsigned LastNewReg,
+                                                 const TargetRegisterClass *RC) {
+  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+       RE = RC->allocation_order_end(MF); R != RE; ++R) {
+    unsigned NewReg = *R;
+    // Don't replace a register with itself.
+    if (NewReg == AntiDepReg) continue;
+    // Don't replace a register with one that was recently used to repair
+    // an anti-dependence with this AntiDepReg, because that would
+    // re-introduce that anti-dependence.
+    if (NewReg == LastNewReg) continue;
+    // If NewReg is dead and NewReg's most recent def is not before
+    // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
+           "Kill and Def maps aren't consistent for NewReg!");
+    if (KillIndices[NewReg] != ~0u ||
+        Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+        KillIndices[AntiDepReg] > DefIndices[NewReg])
+      continue;
+    return NewReg;
+  }
+
+  // No registers are free and available!
+  return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(std::vector<SUnit>& SUnits,
+                      CandidateMap& Candidates,
+                      MachineBasicBlock::iterator& Begin,
+                      MachineBasicBlock::iterator& End,
+                      unsigned InsertPosIndex) {
+  // The code below assumes that there is at least one instruction,
+  // so just duck out immediately if the block is empty.
+  if (SUnits.empty()) return 0;
+
+  // Find the node at the bottom of the critical path.
+  SUnit *Max = 0;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+      Max = SU;
+  }
+
+#ifndef NDEBUG
+  {
+    DEBUG(errs() << "Critical path has total latency "
+          << (Max->getDepth() + Max->Latency) << "\n");
+    DEBUG(errs() << "Available regs:");
+    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+      if (KillIndices[Reg] == ~0u)
+        DEBUG(errs() << " " << TRI->getName(Reg));
+    }
+    DEBUG(errs() << '\n');
+  }
+#endif
+
+  // Track progress along the critical path through the SUnit graph as we walk
+  // the instructions.
+  SUnit *CriticalPathSU = Max;
+  MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+  // Consider this pattern:
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  // There are three anti-dependencies here, and without special care,
+  // we'd break all of them using the same register:
+  //   A = ...
+  //   ... = A
+  //   B = ...
+  //   ... = B
+  //   B = ...
+  //   ... = B
+  //   B = ...
+  //   ... = B
+  // because at each anti-dependence, B is the first register that
+  // isn't A which is free.  This re-introduces anti-dependencies
+  // at all but one of the original anti-dependencies that we were
+  // trying to break.  To avoid this, keep track of the most recent
+  // register that each register was replaced with, avoid
+  // using it to repair an anti-dependence on the same register.
+  // This lets us produce this:
+  //   A = ...
+  //   ... = A
+  //   B = ...
+  //   ... = B
+  //   C = ...
+  //   ... = C
+  //   B = ...
+  //   ... = B
+  // This still has an anti-dependence on B, but at least it isn't on the
+  // original critical path.
+  //
+  // TODO: If we tracked more than one register here, we could potentially
+  // fix that remaining critical edge too. This is a little more involved,
+  // because unlike the most recent register, less recent registers should
+  // still be considered, though only if no other registers are available.
+  unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
+
+  // Attempt to break anti-dependence edges on the critical path. Walk the
+  // instructions from the bottom up, tracking information about liveness
+  // as we go to help determine which registers are available.
+  unsigned Broken = 0;
+  unsigned Count = InsertPosIndex - 1;
+  for (MachineBasicBlock::iterator I = End, E = Begin;
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+
+    // Check if this instruction has a dependence on the critical path that
+    // is an anti-dependence that we may be able to break. If it is, set
+    // AntiDepReg to the non-zero register associated with the anti-dependence.
+    //
+    // We limit our attention to the critical path as a heuristic to avoid
+    // breaking anti-dependence edges that aren't going to significantly
+    // impact the overall schedule. There are a limited number of registers
+    // and we want to save them for the important edges.
+    // 
+    // TODO: Instructions with multiple defs could have multiple
+    // anti-dependencies. The current code here only knows how to break one
+    // edge per instruction. Note that we'd have to be able to break all of
+    // the anti-dependencies in an instruction in order to be effective.
+    unsigned AntiDepReg = 0;
+    if (MI == CriticalPathMI) {
+      if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+        SUnit *NextSU = Edge->getSUnit();
+
+        // Only consider anti-dependence edges.
+        if (Edge->getKind() == SDep::Anti) {
+          AntiDepReg = Edge->getReg();
+          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+          if (!AllocatableSet.test(AntiDepReg))
+            // Don't break anti-dependencies on non-allocatable registers.
+            AntiDepReg = 0;
+          else if (KeepRegs.count(AntiDepReg))
+            // Don't break anti-dependencies if an use down below requires
+            // this exact register.
+            AntiDepReg = 0;
+          else {
+            // If the SUnit has other dependencies on the SUnit that it
+            // anti-depends on, don't bother breaking the anti-dependency
+            // since those edges would prevent such units from being
+            // scheduled past each other regardless.
+            //
+            // Also, if there are dependencies on other SUnits with the
+            // same register as the anti-dependency, don't attempt to
+            // break it.
+            for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+                 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+              if (P->getSUnit() == NextSU ?
+                    (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                    (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+                AntiDepReg = 0;
+                break;
+              }
+          }
+        }
+        CriticalPathSU = NextSU;
+        CriticalPathMI = CriticalPathSU->getInstr();
+      } else {
+        // We've reached the end of the critical path.
+        CriticalPathSU = 0;
+        CriticalPathMI = 0;
+      }
+    }
+
+    PrescanInstruction(MI);
+
+    if (MI->getDesc().hasExtraDefRegAllocReq())
+      // If this instruction's defs have special allocation requirement, don't
+      // break this anti-dependency.
+      AntiDepReg = 0;
+    else if (AntiDepReg) {
+      // If this instruction has a use of AntiDepReg, breaking it
+      // is invalid.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (Reg == 0) continue;
+        if (MO.isUse() && AntiDepReg == Reg) {
+          AntiDepReg = 0;
+          break;
+        }
+      }
+    }
+
+    // Determine AntiDepReg's register class, if it is live and is
+    // consistently used within a single class.
+    const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+    assert((AntiDepReg == 0 || RC != NULL) &&
+           "Register should be live if it's causing an anti-dependence!");
+    if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+      AntiDepReg = 0;
+
+    // Look for a suitable register to use to break the anti-depenence.
+    //
+    // TODO: Instead of picking the first free register, consider which might
+    // be the best.
+    if (AntiDepReg != 0) {
+      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
+                                                     LastNewReg[AntiDepReg],
+                                                     RC)) {
+        DEBUG(errs() << "Breaking anti-dependence edge on "
+              << TRI->getName(AntiDepReg)
+              << " with " << RegRefs.count(AntiDepReg) << " references"
+              << " using " << TRI->getName(NewReg) << "!\n");
+
+        // Update the references to the old register to refer to the new
+        // register.
+        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                  std::multimap<unsigned, MachineOperand *>::iterator>
+           Range = RegRefs.equal_range(AntiDepReg);
+        for (std::multimap<unsigned, MachineOperand *>::iterator
+             Q = Range.first, QE = Range.second; Q != QE; ++Q)
+          Q->second->setReg(NewReg);
+
+        // We just went back in time and modified history; the
+        // liveness information for the anti-depenence reg is now
+        // inconsistent. Set the state as if it were dead.
+        Classes[NewReg] = Classes[AntiDepReg];
+        DefIndices[NewReg] = DefIndices[AntiDepReg];
+        KillIndices[NewReg] = KillIndices[AntiDepReg];
+        assert(((KillIndices[NewReg] == ~0u) !=
+                (DefIndices[NewReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for NewReg!");
+
+        Classes[AntiDepReg] = 0;
+        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+        KillIndices[AntiDepReg] = ~0u;
+        assert(((KillIndices[AntiDepReg] == ~0u) !=
+                (DefIndices[AntiDepReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for AntiDepReg!");
+
+        RegRefs.erase(AntiDepReg);
+        LastNewReg[AntiDepReg] = NewReg;
+        ++Broken;
+      }
+    }
+
+    ScanInstruction(MI, Count);
+  }
+
+  return Broken;
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 0000000..5664d85
--- /dev/null
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,103 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+  class CriticalAntiDepBreaker : public AntiDepBreaker {
+    MachineFunction& MF;
+    MachineRegisterInfo &MRI;
+    const TargetRegisterInfo *TRI;
+
+    /// AllocatableSet - The set of allocatable registers.
+    /// We'll be ignoring anti-dependencies on non-allocatable registers,
+    /// because they may not be safe to break.
+    const BitVector AllocatableSet;
+
+    /// Classes - For live regs that are only used in one register class in a
+    /// live range, the register class. If the register is not live, the
+    /// corresponding value is null. If the register is live but used in
+    /// multiple register classes, the corresponding value is -1 casted to a
+    /// pointer.
+    const TargetRegisterClass *
+      Classes[TargetRegisterInfo::FirstVirtualRegister];
+
+    /// RegRegs - Map registers to all their references within a live range.
+    std::multimap<unsigned, MachineOperand *> RegRefs;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
+    unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+    /// KeepRegs - A set of registers which are live and cannot be changed to
+    /// break anti-dependencies.
+    SmallSet<unsigned, 4> KeepRegs;
+
+  public:
+    CriticalAntiDepBreaker(MachineFunction& MFi);
+    ~CriticalAntiDepBreaker();
+    
+    /// GetMaxTrials - Critical path anti-dependence breaking requires
+    /// only a single pass
+    unsigned GetMaxTrials() { return 1; }
+
+    /// NeedCandidates - Candidates not needed.
+    bool NeedCandidates() { return false; }
+
+    /// Start - Initialize anti-dep breaking for a new basic block.
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+    /// of the ScheduleDAG and break them by renaming registers.
+    ///
+    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
+                                   CandidateMap& Candidates,
+                                   MachineBasicBlock::iterator& Begin,
+                                   MachineBasicBlock::iterator& End,
+                                   unsigned InsertPosIndex);
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+    /// Finish - Finish anti-dep breaking for a basic block.
+    void FinishBlock();
+
+  private:
+    void PrescanInstruction(MachineInstr *MI);
+    void ScanInstruction(MachineInstr *MI, unsigned Count);
+    unsigned findSuitableFreeRegister(unsigned AntiDepReg,
+                                      unsigned LastNewReg,
+                                      const TargetRegisterClass *);
+  };
+}
+
+#endif
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 078ed3d..07a5d38 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -23,8 +22,7 @@
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN DeadMachineInstructionElim : 
-        public MachineFunctionPass {
+  class DeadMachineInstructionElim : public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &MF);
     
     const TargetRegisterInfo *TRI;
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 72b3f92..7fc62a9 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -21,7 +21,6 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -33,7 +32,7 @@ STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
 STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
 
 namespace {
-  class VISIBILITY_HIDDEN DwarfEHPrepare : public FunctionPass {
+  class DwarfEHPrepare : public FunctionPass {
     const TargetLowering *TLI;
     bool CompileFast;
 
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index b466e89..e303ebb 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -22,7 +22,7 @@
 
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
   class GlobalValue;
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index f35d196..36925b1 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "exact-hazards"
+#define DEBUG_TYPE "post-RA-sched"
 #include "ExactHazardRecognizer.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index a57296c..4d25dcc 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -17,14 +17,13 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
   
-  class VISIBILITY_HIDDEN Printer : public FunctionPass {
+  class Printer : public FunctionPass {
     static char ID;
     raw_ostream &OS;
     
@@ -39,7 +38,7 @@ namespace {
     bool runOnFunction(Function &F);
   };
   
-  class VISIBILITY_HIDDEN Deleter : public FunctionPass {
+  class Deleter : public FunctionPass {
     static char ID;
     
   public:
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6d0de41..6e0bde6 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -39,7 +38,7 @@ namespace {
   /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as 
   /// directed by the GCStrategy. It also performs automatic root initialization
   /// and custom intrinsic lowering.
-  class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass {
+  class LowerIntrinsics : public FunctionPass {
     static bool NeedsDefaultLoweringPass(const GCStrategy &C);
     static bool NeedsCustomLoweringPass(const GCStrategy &C);
     static bool CouldBecomeSafePoint(Instruction *I);
@@ -63,7 +62,7 @@ namespace {
   /// function representation to identify safe points for the garbage collector
   /// in the machine code. It inserts labels at safe points and populates a
   /// GCMetadata record for each function.
-  class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass {
+  class MachineCodeAnalysis : public MachineFunctionPass {
     const TargetMachine *TM;
     GCFunctionInfo *FI;
     MachineModuleInfo *MMI;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 7b613ff..45f08b1 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -59,7 +59,7 @@ STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
 STATISTIC(NumDupBBs,       "Number of duplicated blocks");
 
 namespace {
-  class VISIBILITY_HIDDEN IfConverter : public MachineFunctionPass {
+  class IfConverter : public MachineFunctionPass {
     enum IfcvtKind {
       ICNotClassfied,  // BB data valid, but not classified.
       ICSimpleFalse,   // Same as ICSimple, but on the false path.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index e58a9ca..6300a52 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -39,8 +39,6 @@ static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden,
     cl::desc("Dump emitter generated instructions as assembly"));
 static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
-static cl::opt<bool> HoistConstants("hoist-constants", cl::Hidden,
-    cl::desc("Hoist constants out of loops"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
@@ -70,18 +68,6 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   if (addCommonCodeGenPasses(PM, OptLevel))
     return FileModel::Error;
 
-  // Fold redundant debug labels.
-  PM.add(createDebugLabelFoldingPass());
-
-  if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs()));
-
-  if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs()));
-
-  if (OptLevel != CodeGenOpt::None)
-    PM.add(createCodePlacementOptPass());
-
   switch (FileType) {
   default:
     break;
@@ -173,9 +159,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
   if (addCommonCodeGenPasses(PM, OptLevel))
     return true;
 
-  if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs()));
-
   addCodeEmitter(PM, OptLevel, MCE);
   if (PrintEmittedAsm)
     addAssemblyEmitter(PM, OptLevel, true, ferrs());
@@ -198,9 +181,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
   if (addCommonCodeGenPasses(PM, OptLevel))
     return true;
 
-  if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs()));
-
   addCodeEmitter(PM, OptLevel, JCE);
   if (PrintEmittedAsm)
     addAssemblyEmitter(PM, OptLevel, true, ferrs());
@@ -211,9 +191,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
 }
 
 static void printAndVerify(PassManagerBase &PM,
+                           const char *Banner,
                            bool allowDoubleDefs = false) {
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs()));
+    PM.add(createMachineFunctionPrinterPass(errs(), Banner));
 
   if (VerifyMachineCode)
     PM.add(createMachineVerifierPass(allowDoubleDefs));
@@ -255,11 +236,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Make sure that no unreachable blocks are instruction selected.
   PM.add(createUnreachableBlockEliminationPass());
 
-  if (OptLevel != CodeGenOpt::None) {
-    if (HoistConstants)
-      PM.add(createCodeGenLICMPass());
+  if (OptLevel != CodeGenOpt::None)
     PM.add(createCodeGenPreparePass(getTargetLowering()));
-  }
 
   PM.add(createStackProtectorPass(getTargetLowering()));
 
@@ -283,61 +261,76 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     return true;
 
   // Print the instruction selected machine code...
-  printAndVerify(PM, /* allowDoubleDefs= */ true);
+  printAndVerify(PM, "After Instruction Selection",
+                 /* allowDoubleDefs= */ true);
 
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createMachineLICMPass());
     PM.add(createMachineSinkingPass());
-    printAndVerify(PM, /* allowDoubleDefs= */ true);
+    printAndVerify(PM, "After MachineLICM and MachineSinking",
+                   /* allowDoubleDefs= */ true);
   }
 
   // Run pre-ra passes.
   if (addPreRegAlloc(PM, OptLevel))
-    printAndVerify(PM, /* allowDoubleDefs= */ true);
+    printAndVerify(PM, "After PreRegAlloc passes",
+                   /* allowDoubleDefs= */ true);
 
   // Perform register allocation.
   PM.add(createRegisterAllocator());
+  printAndVerify(PM, "After Register Allocation");
 
   // Perform stack slot coloring.
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
     // FIXME: Re-enable coloring with register when it's capable of adding
     // kill markers.
     PM.add(createStackSlotColoringPass(false));
-
-  printAndVerify(PM);           // Print the register-allocated code
+    printAndVerify(PM, "After StackSlotColoring");
+  }
 
   // Run post-ra passes.
   if (addPostRegAlloc(PM, OptLevel))
-    printAndVerify(PM);
+    printAndVerify(PM, "After PostRegAlloc passes");
 
   PM.add(createLowerSubregsPass());
-  printAndVerify(PM);
+  printAndVerify(PM, "After LowerSubregs");
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
   PM.add(createPrologEpilogCodeInserter());
-  printAndVerify(PM);
+  printAndVerify(PM, "After PrologEpilogCodeInserter");
 
   // Run pre-sched2 passes.
   if (addPreSched2(PM, OptLevel))
-    printAndVerify(PM);
+    printAndVerify(PM, "After PreSched2 passes");
 
   // Second pass scheduler.
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createPostRAScheduler(OptLevel));
-    printAndVerify(PM);
+    printAndVerify(PM, "After PostRAScheduler");
   }
 
   // Branch folding must be run after regalloc and prolog/epilog insertion.
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
-    printAndVerify(PM);
+    printAndVerify(PM, "After BranchFolding");
   }
 
   PM.add(createGCMachineCodeAnalysisPass());
-  printAndVerify(PM);
 
   if (PrintGCInfo)
     PM.add(createGCInfoPrinter(errs()));
 
+  // Fold redundant debug labels.
+  PM.add(createDebugLabelFoldingPass());
+  printAndVerify(PM, "After DebugLabelFolding");
+
+  if (addPreEmitPass(PM, OptLevel))
+    printAndVerify(PM, "After PreEmit passes");
+
+  if (OptLevel != CodeGenOpt::None) {
+    PM.add(createCodePlacementOptPass());
+    printAndVerify(PM, "After CodePlacementOpt");
+  }
+
   return false;
 }
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 2e7b89c..794ecf7 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,6 +55,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
   SUnit *OnlyAvailablePred = 0;
   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
+    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
     SUnit &Pred = *I->getSUnit();
     if (!Pred.isScheduled) {
       // We found an available, but not scheduled, predecessor.  If it's the
@@ -73,9 +74,11 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
   // this node is the sole unscheduled node for.
   unsigned NumNodesBlocking = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I)
+       I != E; ++I) {
+    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
     if (getSingleUnscheduledPred(I->getSUnit()) == SU)
       ++NumNodesBlocking;
+  }
   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
   
   Queue.push(SU);
@@ -88,8 +91,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
 // the node available.
 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I)
+       I != E; ++I) {
+    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+  }
 }
 
 /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index a02a4a6..8d632cb 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
@@ -28,11 +29,6 @@
 #include <algorithm>
 using namespace llvm;
 
-// Print a LiveIndex to a raw_ostream.
-void LiveIndex::print(raw_ostream &os) const {
-  os << (index & ~PHI_BIT);
-}
-
 // An example for liveAt():
 //
 // this = [1,4), liveAt(0) will return false. The instruction defining this
@@ -40,7 +36,7 @@ void LiveIndex::print(raw_ostream &os) const {
 // variable it represents. This is because slot 1 is used (def slot) and spans
 // up to slot 3 (store slot).
 //
-bool LiveInterval::liveAt(LiveIndex I) const {
+bool LiveInterval::liveAt(SlotIndex I) const {
   Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
 
   if (r == ranges.begin())
@@ -53,7 +49,7 @@ bool LiveInterval::liveAt(LiveIndex I) const {
 // liveBeforeAndAt - Check if the interval is live at the index and the index
 // just before it. If index is liveAt, check if it starts a new live range.
 // If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(LiveIndex I) const {
+bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
   Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
 
   if (r == ranges.begin())
@@ -131,7 +127,7 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
 
 /// overlaps - Return true if the live interval overlaps a range specified
 /// by [Start, End).
-bool LiveInterval::overlaps(LiveIndex Start, LiveIndex End) const {
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
   assert(Start < End && "Invalid range");
   const_iterator I  = begin();
   const_iterator E  = end();
@@ -149,10 +145,10 @@ bool LiveInterval::overlaps(LiveIndex Start, LiveIndex End) const {
 /// specified by I to end at the specified endpoint.  To do this, we should
 /// merge and eliminate all ranges that this will overlap with.  The iterator is
 /// not invalidated.
-void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) {
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
   assert(I != ranges.end() && "Not a valid interval!");
   VNInfo *ValNo = I->valno;
-  LiveIndex OldEnd = I->end;
+  SlotIndex OldEnd = I->end;
 
   // Search for the first interval that we can't merge with.
   Ranges::iterator MergeTo = next(I);
@@ -167,7 +163,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) {
   ranges.erase(next(I), MergeTo);
 
   // Update kill info.
-  ValNo->removeKills(OldEnd, I->end.prevSlot_());
+  ValNo->removeKills(OldEnd, I->end.getPrevSlot());
 
   // If the newly formed range now touches the range after it and if they have
   // the same value number, merge the two ranges into one range.
@@ -183,7 +179,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) {
 /// specified by I to start at the specified endpoint.  To do this, we should
 /// merge and eliminate all ranges that this will overlap with.
 LiveInterval::Ranges::iterator
-LiveInterval::extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStart) {
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
   assert(I != ranges.end() && "Not a valid interval!");
   VNInfo *ValNo = I->valno;
 
@@ -216,7 +212,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStart) {
 
 LiveInterval::iterator
 LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
-  LiveIndex Start = LR.start, End = LR.end;
+  SlotIndex Start = LR.start, End = LR.end;
   iterator it = std::upper_bound(From, ranges.end(), Start);
 
   // If the inserted interval starts in the middle or right at the end of
@@ -268,7 +264,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
 
 /// isInOneLiveRange - Return true if the range specified is entirely in 
 /// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(LiveIndex Start, LiveIndex End) {
+bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
   Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
   if (I == ranges.begin())
     return false;
@@ -279,7 +275,7 @@ bool LiveInterval::isInOneLiveRange(LiveIndex Start, LiveIndex End) {
 
 /// removeRange - Remove the specified range from this interval.  Note that
 /// the range must be in a single LiveRange in its entirety.
-void LiveInterval::removeRange(LiveIndex Start, LiveIndex End,
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
                                bool RemoveDeadValNo) {
   // Find the LiveRange containing this span.
   Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
@@ -331,7 +327,7 @@ void LiveInterval::removeRange(LiveIndex Start, LiveIndex End,
   }
 
   // Otherwise, we are splitting the LiveRange into two pieces.
-  LiveIndex OldEnd = I->end;
+  SlotIndex OldEnd = I->end;
   I->end = Start;   // Trim the old interval.
 
   // Insert the new one.
@@ -362,36 +358,11 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
     ValNo->setIsUnused(true);
   }
 }
- 
-/// scaleNumbering - Renumber VNI and ranges to provide gaps for new
-/// instructions.                                                   
-
-void LiveInterval::scaleNumbering(unsigned factor) {
-  // Scale ranges.                                                            
-  for (iterator RI = begin(), RE = end(); RI != RE; ++RI) {
-    RI->start = RI->start.scale(factor);
-    RI->end = RI->end.scale(factor);
-  }
-
-  // Scale VNI info.                                                          
-  for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
-    VNInfo *vni = *VNI;
-
-    if (vni->isDefAccurate())
-      vni->def = vni->def.scale(factor);
-
-    for (unsigned i = 0; i < vni->kills.size(); ++i) {
-      if (!vni->kills[i].isPHIIndex())
-        vni->kills[i] = vni->kills[i].scale(factor);
-    }
-  }
-}
-
 
 /// getLiveRangeContaining - Return the live range that contains the
 /// specified index, or null if there is none.
 LiveInterval::const_iterator 
-LiveInterval::FindLiveRangeContaining(LiveIndex Idx) const {
+LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
   const_iterator It = std::upper_bound(begin(), end(), Idx);
   if (It != ranges.begin()) {
     --It;
@@ -403,7 +374,7 @@ LiveInterval::FindLiveRangeContaining(LiveIndex Idx) const {
 }
 
 LiveInterval::iterator 
-LiveInterval::FindLiveRangeContaining(LiveIndex Idx) {
+LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
   iterator It = std::upper_bound(begin(), end(), Idx);
   if (It != begin()) {
     --It;
@@ -416,7 +387,7 @@ LiveInterval::FindLiveRangeContaining(LiveIndex Idx) {
 
 /// findDefinedVNInfo - Find the VNInfo defined by the specified
 /// index (register interval).
-VNInfo *LiveInterval::findDefinedVNInfoForRegInt(LiveIndex Idx) const {
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
   for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
        i != e; ++i) {
     if ((*i)->def == Idx)
@@ -440,7 +411,8 @@ VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
 /// join - Join two live intervals (this, and other) together.  This applies
 /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
 /// the intervals are not joinable, this aborts.
-void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
+void LiveInterval::join(LiveInterval &Other,
+                        const int *LHSValNoAssignments,
                         const int *RHSValNoAssignments, 
                         SmallVector<VNInfo*, 16> &NewVNInfo,
                         MachineRegisterInfo *MRI) {
@@ -554,14 +526,15 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
 /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
 /// current interval, it will replace the value numbers of the overlaped
 /// live ranges with the specified value number.
-void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
-                                     const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+void LiveInterval::MergeValueInAsValue(
+                                    const LiveInterval &RHS,
+                                    const VNInfo *RHSValNo, VNInfo *LHSValNo) {
   SmallVector<VNInfo*, 4> ReplacedValNos;
   iterator IP = begin();
   for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
     if (I->valno != RHSValNo)
       continue;
-    LiveIndex Start = I->start, End = I->end;
+    SlotIndex Start = I->start, End = I->end;
     IP = std::upper_bound(IP, end(), Start);
     // If the start of this range overlaps with an existing liverange, trim it.
     if (IP != begin() && IP[-1].end > Start) {
@@ -621,7 +594,8 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
 /// MergeInClobberRanges - For any live ranges that are not defined in the
 /// current interval, but are defined in the Clobbers interval, mark them
 /// used with an unknown definition value.
-void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
+void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
+                                        const LiveInterval &Clobbers,
                                         BumpPtrAllocator &VNInfoAllocator) {
   if (Clobbers.empty()) return;
   
@@ -638,20 +612,20 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
       ClobberValNo = UnusedValNo;
     else {
       UnusedValNo = ClobberValNo =
-        getNextValue(LiveIndex(), 0, false, VNInfoAllocator);
+        getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
       ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
     }
 
     bool Done = false;
-    LiveIndex Start = I->start, End = I->end;
+    SlotIndex Start = I->start, End = I->end;
     // If a clobber range starts before an existing range and ends after
     // it, the clobber range will need to be split into multiple ranges.
     // Loop until the entire clobber range is handled.
     while (!Done) {
       Done = true;
       IP = std::upper_bound(IP, end(), Start);
-      LiveIndex SubRangeStart = Start;
-      LiveIndex SubRangeEnd = End;
+      SlotIndex SubRangeStart = Start;
+      SlotIndex SubRangeEnd = End;
 
       // If the start of this range overlaps with an existing liverange, trim it.
       if (IP != begin() && IP[-1].end > SubRangeStart) {
@@ -687,13 +661,14 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
   }
 }
 
-void LiveInterval::MergeInClobberRange(LiveIndex Start,
-                                       LiveIndex End,
+void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
+                                       SlotIndex Start,
+                                       SlotIndex End,
                                        BumpPtrAllocator &VNInfoAllocator) {
   // Find a value # to use for the clobber ranges.  If there is already a value#
   // for unknown values, use it.
   VNInfo *ClobberValNo =
-    getNextValue(LiveIndex(), 0, false, VNInfoAllocator);
+    getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
   
   iterator IP = begin();
   IP = std::upper_bound(IP, end(), Start);
@@ -881,8 +856,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
           OS << "-(";
           for (unsigned j = 0; j != ee; ++j) {
             OS << vni->kills[j];
-            if (vni->kills[j].isPHIIndex())
-              OS << "*";
             if (j != ee-1)
               OS << " ";
           }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 79f46f3..2a93a35 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -80,6 +81,10 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   }
   
   AU.addRequiredID(TwoAddressInstructionPassID);
+  AU.addPreserved<ProcessImplicitDefs>();
+  AU.addRequired<ProcessImplicitDefs>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequiredTransitive<SlotIndexes>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -89,12 +94,7 @@ void LiveIntervals::releaseMemory() {
        E = r2iMap_.end(); I != E; ++I)
     delete I->second;
   
-  MBB2IdxMap.clear();
-  Idx2MBBMap.clear();
-  mi2iMap_.clear();
-  i2miMap_.clear();
   r2iMap_.clear();
-  terminatorGaps.clear();
   phiJoinCopies.clear();
 
   // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
@@ -106,422 +106,6 @@ void LiveIntervals::releaseMemory() {
   }
 }
 
-static bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
-                                   unsigned OpIdx, const TargetInstrInfo *tii_){
-  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      Reg == SrcReg)
-    return true;
-
-  if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
-    return true;
-  if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
-    return true;
-  return false;
-}
-
-/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
-/// there is one implicit_def for each use. Add isUndef marker to
-/// implicit_def defs and their uses.
-void LiveIntervals::processImplicitDefs() {
-  SmallSet<unsigned, 8> ImpDefRegs;
-  SmallVector<MachineInstr*, 8> ImpDefMIs;
-  MachineBasicBlock *Entry = mf_->begin();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
-         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-       DFI != E; ++DFI) {
-    MachineBasicBlock *MBB = *DFI;
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ) {
-      MachineInstr *MI = &*I;
-      ++I;
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
-        unsigned Reg = MI->getOperand(0).getReg();
-        ImpDefRegs.insert(Reg);
-        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
-            ImpDefRegs.insert(*SS);
-        }
-        ImpDefMIs.push_back(MI);
-        continue;
-      }
-
-      if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
-        MachineOperand &MO = MI->getOperand(2);
-        if (ImpDefRegs.count(MO.getReg())) {
-          // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
-          // This is an identity copy, eliminate it now.
-          if (MO.isKill()) {
-            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
-            vi.removeKill(MI);
-          }
-          MI->eraseFromParent();
-          continue;
-        }
-      }
-
-      bool ChangedToImpDef = false;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg() || !MO.isUse() || MO.isUndef())
-          continue;
-        unsigned Reg = MO.getReg();
-        if (!Reg)
-          continue;
-        if (!ImpDefRegs.count(Reg))
-          continue;
-        // Use is a copy, just turn it into an implicit_def.
-        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
-          bool isKill = MO.isKill();
-          MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
-            MI->RemoveOperand(j);
-          if (isKill) {
-            ImpDefRegs.erase(Reg);
-            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
-            vi.removeKill(MI);
-          }
-          ChangedToImpDef = true;
-          break;
-        }
-
-        MO.setIsUndef();
-        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
-          // Make sure other uses of 
-          for (unsigned j = i+1; j != e; ++j) {
-            MachineOperand &MOJ = MI->getOperand(j);
-            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
-              MOJ.setIsUndef();
-          }
-          ImpDefRegs.erase(Reg);
-        }
-      }
-
-      if (ChangedToImpDef) {
-        // Backtrack to process this new implicit_def.
-        --I;
-      } else {
-        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-          MachineOperand& MO = MI->getOperand(i);
-          if (!MO.isReg() || !MO.isDef())
-            continue;
-          ImpDefRegs.erase(MO.getReg());
-        }
-      }
-    }
-
-    // Any outstanding liveout implicit_def's?
-    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
-      MachineInstr *MI = ImpDefMIs[i];
-      unsigned Reg = MI->getOperand(0).getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
-          !ImpDefRegs.count(Reg)) {
-        // Delete all "local" implicit_def's. That include those which define
-        // physical registers since they cannot be liveout.
-        MI->eraseFromParent();
-        continue;
-      }
-
-      // If there are multiple defs of the same register and at least one
-      // is not an implicit_def, do not insert implicit_def's before the
-      // uses.
-      bool Skip = false;
-      for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
-             DE = mri_->def_end(); DI != DE; ++DI) {
-        if (DI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
-          Skip = true;
-          break;
-        }
-      }
-      if (Skip)
-        continue;
-
-      // The only implicit_def which we want to keep are those that are live
-      // out of its block.
-      MI->eraseFromParent();
-
-      for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
-             UE = mri_->use_end(); UI != UE; ) {
-        MachineOperand &RMO = UI.getOperand();
-        MachineInstr *RMI = &*UI;
-        ++UI;
-        MachineBasicBlock *RMBB = RMI->getParent();
-        if (RMBB == MBB)
-          continue;
-
-        // Turn a copy use into an implicit_def.
-        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-        if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-            Reg == SrcReg) {
-          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-          for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
-            RMI->RemoveOperand(j);
-          continue;
-        }
-
-        const TargetRegisterClass* RC = mri_->getRegClass(Reg);
-        unsigned NewVReg = mri_->createVirtualRegister(RC);
-        RMO.setReg(NewVReg);
-        RMO.setIsUndef();
-        RMO.setIsKill();
-      }
-    }
-    ImpDefRegs.clear();
-    ImpDefMIs.clear();
-  }
-}
-
-
-void LiveIntervals::computeNumbering() {
-  Index2MiMap OldI2MI = i2miMap_;
-  std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap;
-  
-  Idx2MBBMap.clear();
-  MBB2IdxMap.clear();
-  mi2iMap_.clear();
-  i2miMap_.clear();
-  terminatorGaps.clear();
-  phiJoinCopies.clear();
-  
-  FunctionSize = 0;
-  
-  // Number MachineInstrs and MachineBasicBlocks.
-  // Initialize MBB indexes to a sentinal.
-  MBB2IdxMap.resize(mf_->getNumBlockIDs(),
-                    std::make_pair(LiveIndex(),LiveIndex()));
-  
-  LiveIndex MIIndex;
-  for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
-       MBB != E; ++MBB) {
-    LiveIndex StartIdx = MIIndex;
-
-    // Insert an empty slot at the beginning of each block.
-    MIIndex = getNextIndex(MIIndex);
-    i2miMap_.push_back(0);
-
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ++I) {
-      
-      if (I == MBB->getFirstTerminator()) {
-        // Leave a gap for before terminators, this is where we will point
-        // PHI kills.
-        LiveIndex tGap(true, MIIndex);
-        bool inserted =
-          terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second;
-        assert(inserted && 
-               "Multiple 'first' terminators encountered during numbering.");
-        inserted = inserted; // Avoid compiler warning if assertions turned off.
-        i2miMap_.push_back(0);
-
-        MIIndex = getNextIndex(MIIndex);
-      }
-
-      bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
-      assert(inserted && "multiple MachineInstr -> index mappings");
-      inserted = true;
-      i2miMap_.push_back(I);
-      MIIndex = getNextIndex(MIIndex);
-      FunctionSize++;
-      
-      // Insert max(1, numdefs) empty slots after every instruction.
-      unsigned Slots = I->getDesc().getNumDefs();
-      if (Slots == 0)
-        Slots = 1;
-      while (Slots--) {
-        MIIndex = getNextIndex(MIIndex);
-        i2miMap_.push_back(0);
-      }
-
-    }
-  
-    if (MBB->getFirstTerminator() == MBB->end()) {
-      // Leave a gap for before terminators, this is where we will point
-      // PHI kills.
-      LiveIndex tGap(true, MIIndex);
-      bool inserted =
-        terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second;
-      assert(inserted && 
-             "Multiple 'first' terminators encountered during numbering.");
-      inserted = inserted; // Avoid compiler warning if assertions turned off.
-      i2miMap_.push_back(0);
- 
-      MIIndex = getNextIndex(MIIndex);
-    }
-    
-    // Set the MBB2IdxMap entry for this MBB.
-    MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, getPrevSlot(MIIndex));
-    Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB));
-  }
-
-  std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
-  
-  if (!OldI2MI.empty())
-    for (iterator OI = begin(), OE = end(); OI != OE; ++OI) {
-      for (LiveInterval::iterator LI = OI->second->begin(),
-           LE = OI->second->end(); LI != LE; ++LI) {
-        
-        // Remap the start index of the live range to the corresponding new
-        // number, or our best guess at what it _should_ correspond to if the
-        // original instruction has been erased.  This is either the following
-        // instruction or its predecessor.
-        unsigned index = LI->start.getVecIndex();
-        LiveIndex::Slot offset = LI->start.getSlot();
-        if (LI->start.isLoad()) {
-          std::vector<IdxMBBPair>::const_iterator I =
-                  std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start);
-          // Take the pair containing the index
-          std::vector<IdxMBBPair>::const_iterator J =
-                    (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I;
-          
-          LI->start = getMBBStartIdx(J->second);
-        } else {
-          LI->start = LiveIndex(
-            LiveIndex(mi2iMap_[OldI2MI[index]]), 
-                              (LiveIndex::Slot)offset);
-        }
-        
-        // Remap the ending index in the same way that we remapped the start,
-        // except for the final step where we always map to the immediately
-        // following instruction.
-        index = (getPrevSlot(LI->end)).getVecIndex();
-        offset  = LI->end.getSlot();
-        if (LI->end.isLoad()) {
-          // VReg dies at end of block.
-          std::vector<IdxMBBPair>::const_iterator I =
-                  std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end);
-          --I;
-          
-          LI->end = getNextSlot(getMBBEndIdx(I->second));
-        } else {
-          unsigned idx = index;
-          while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
-          
-          if (index != OldI2MI.size())
-            LI->end =
-              LiveIndex(mi2iMap_[OldI2MI[index]],
-                (idx == index ? offset : LiveIndex::LOAD));
-          else
-            LI->end =
-              LiveIndex(LiveIndex::NUM * i2miMap_.size());
-        }
-      }
-      
-      for (LiveInterval::vni_iterator VNI = OI->second->vni_begin(),
-           VNE = OI->second->vni_end(); VNI != VNE; ++VNI) { 
-        VNInfo* vni = *VNI;
-        
-        // Remap the VNInfo def index, which works the same as the
-        // start indices above. VN's with special sentinel defs
-        // don't need to be remapped.
-        if (vni->isDefAccurate() && !vni->isUnused()) {
-          unsigned index = vni->def.getVecIndex();
-          LiveIndex::Slot offset = vni->def.getSlot();
-          if (vni->def.isLoad()) {
-            std::vector<IdxMBBPair>::const_iterator I =
-                  std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def);
-            // Take the pair containing the index
-            std::vector<IdxMBBPair>::const_iterator J =
-                    (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I;
-          
-            vni->def = getMBBStartIdx(J->second);
-          } else {
-            vni->def = LiveIndex(mi2iMap_[OldI2MI[index]], offset);
-          }
-        }
-        
-        // Remap the VNInfo kill indices, which works the same as
-        // the end indices above.
-        for (size_t i = 0; i < vni->kills.size(); ++i) {
-          unsigned index = getPrevSlot(vni->kills[i]).getVecIndex();
-          LiveIndex::Slot offset = vni->kills[i].getSlot();
-
-          if (vni->kills[i].isLoad()) {
-            assert("Value killed at a load slot.");
-            /*std::vector<IdxMBBPair>::const_iterator I =
-             std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
-            --I;
-
-            vni->kills[i] = getMBBEndIdx(I->second);*/
-          } else {
-            if (vni->kills[i].isPHIIndex()) {
-              std::vector<IdxMBBPair>::const_iterator I =
-                std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
-              --I;
-              vni->kills[i] = terminatorGaps[I->second];  
-            } else {
-              assert(OldI2MI[index] != 0 &&
-                     "Kill refers to instruction not present in index maps.");
-              vni->kills[i] = LiveIndex(mi2iMap_[OldI2MI[index]], offset);
-            }
-           
-            /*
-            unsigned idx = index;
-            while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
-            
-            if (index != OldI2MI.size())
-              vni->kills[i] = mi2iMap_[OldI2MI[index]] + 
-                              (idx == index ? offset : 0);
-            else
-              vni->kills[i] = InstrSlots::NUM * i2miMap_.size();
-            */
-          }
-        }
-      }
-    }
-}
-
-void LiveIntervals::scaleNumbering(int factor) {
-  // Need to
-  //  * scale MBB begin and end points
-  //  * scale all ranges.
-  //  * Update VNI structures.
-  //  * Scale instruction numberings 
-
-  // Scale the MBB indices.
-  Idx2MBBMap.clear();
-  for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end();
-       MBB != MBBE; ++MBB) {
-    std::pair<LiveIndex, LiveIndex> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
-    mbbIndices.first = mbbIndices.first.scale(factor);
-    mbbIndices.second = mbbIndices.second.scale(factor);
-    Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); 
-  }
-  std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
-
-  // Scale terminator gaps.
-  for (DenseMap<MachineBasicBlock*, LiveIndex>::iterator
-       TGI = terminatorGaps.begin(), TGE = terminatorGaps.end();
-       TGI != TGE; ++TGI) {
-    terminatorGaps[TGI->first] = TGI->second.scale(factor);
-  }
-
-  // Scale the intervals.
-  for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
-    LI->second->scaleNumbering(factor);
-  }
-
-  // Scale MachineInstrs.
-  Mi2IndexMap oldmi2iMap = mi2iMap_;
-  LiveIndex highestSlot;
-  for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end();
-       MI != ME; ++MI) {
-    LiveIndex newSlot = MI->second.scale(factor);
-    mi2iMap_[MI->first] = newSlot;
-    highestSlot = std::max(highestSlot, newSlot); 
-  }
-
-  unsigned highestVIndex = highestSlot.getVecIndex();
-  i2miMap_.clear();
-  i2miMap_.resize(highestVIndex + 1);
-  for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end();
-       MI != ME; ++MI) {
-    i2miMap_[MI->second.getVecIndex()] = const_cast<MachineInstr *>(MI->first);
-  }
-
-}
-
-
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
@@ -532,10 +116,9 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   tii_ = tm_->getInstrInfo();
   aa_ = &getAnalysis<AliasAnalysis>();
   lv_ = &getAnalysis<LiveVariables>();
+  indexes_ = &getAnalysis<SlotIndexes>();
   allocatableRegs_ = tri_->getAllocatableSet(fn);
 
-  processImplicitDefs();
-  computeNumbering();
   computeIntervals();
   performEarlyCoalescing();
 
@@ -579,12 +162,13 @@ bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
                                             VirtRegMap &vrm, unsigned reg) {
   for (LiveInterval::Ranges::const_iterator
          I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (LiveIndex index = getBaseIndex(I->start),
-           end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end;
-         index = getNextIndex(index)) {
+    for (SlotIndex index = I->start.getBaseIndex(),
+           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+           index != end;
+           index = index.getNextIndex()) {
       // skip deleted instructions
       while (index != end && !getInstructionFromIndex(index))
-        index = getNextIndex(index);
+        index = index.getNextIndex();
       if (index == end) break;
 
       MachineInstr *MI = getInstructionFromIndex(index);
@@ -620,16 +204,17 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
   for (LiveInterval::Ranges::const_iterator
          I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (LiveIndex index = getBaseIndex(I->start),
-           end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end;
-         index = getNextIndex(index)) {
+    for (SlotIndex index = I->start.getBaseIndex(),
+           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+           index != end;
+           index = index.getNextIndex()) {
       // Skip deleted instructions.
       MachineInstr *MI = 0;
       while (index != end) {
         MI = getInstructionFromIndex(index);
         if (MI)
           break;
-        index = getNextIndex(index);
+        index = index.getNextIndex();
       }
       if (index == end) break;
 
@@ -664,7 +249,7 @@ static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
 
 void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              MachineBasicBlock::iterator mi,
-                                             LiveIndex MIIdx,
+                                             SlotIndex MIIdx,
                                              MachineOperand& MO,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
@@ -680,11 +265,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
   LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
   if (interval.empty()) {
     // Get the Idx of the defining instructions.
-    LiveIndex defIndex = getDefIndex(MIIdx);
+    SlotIndex defIndex = MIIdx.getDefIndex();
     // Earlyclobbers move back one, so that they overlap the live range
     // of inputs.
     if (MO.isEarlyClobber())
-      defIndex = getUseIndex(MIIdx);
+      defIndex = MIIdx.getUseIndex();
     VNInfo *ValNo;
     MachineInstr *CopyMI = NULL;
     unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
@@ -704,16 +289,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // will be a single kill, in MBB, which comes after the definition.
     if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
       // FIXME: what about dead vars?
-      LiveIndex killIdx;
+      SlotIndex killIdx;
       if (vi.Kills[0] != mi)
-        killIdx = getNextSlot(getUseIndex(getInstructionIndex(vi.Kills[0])));
-      else if (MO.isEarlyClobber())
-        // Earlyclobbers that die in this instruction move up one extra, to
-        // compensate for having the starting point moved back one.  This
-        // gets them to overlap the live range of other outputs.
-        killIdx = getNextSlot(getNextSlot(defIndex));
+        killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
       else
-        killIdx = getNextSlot(defIndex);
+        killIdx = defIndex.getStoreIndex();
 
       // If the kill happens after the definition, we have an intra-block
       // live range.
@@ -732,7 +312,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // of the defining block, potentially live across some blocks, then is
     // live into some number of blocks, but gets killed.  Start by adding a
     // range that goes from this definition to the end of the defining block.
-    LiveRange NewLR(defIndex, getNextSlot(getMBBEndIdx(mbb)), ValNo);
+    LiveRange NewLR(defIndex, getMBBEndIdx(mbb).getNextIndex().getLoadIndex(),
+                    ValNo);
     DEBUG(errs() << " +" << NewLR);
     interval.addRange(NewLR);
 
@@ -741,9 +322,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // live interval.
     for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), 
              E = vi.AliveBlocks.end(); I != E; ++I) {
-      LiveRange LR(getMBBStartIdx(*I),
-                   getNextSlot(getMBBEndIdx(*I)),  // MBB ends at -1.
-                   ValNo);
+      LiveRange LR(
+          getMBBStartIdx(mf_->getBlockNumbered(*I)),
+          getMBBEndIdx(mf_->getBlockNumbered(*I)).getNextIndex().getLoadIndex(),
+          ValNo);
       interval.addRange(LR);
       DEBUG(errs() << " +" << LR);
     }
@@ -752,8 +334,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // block to the 'use' slot of the killing instruction.
     for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
       MachineInstr *Kill = vi.Kills[i];
-      LiveIndex killIdx =
-        getNextSlot(getUseIndex(getInstructionIndex(Kill)));
+      SlotIndex killIdx =
+        getInstructionIndex(Kill).getDefIndex();
       LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
       interval.addRange(LR);
       ValNo->addKill(killIdx);
@@ -772,13 +354,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // need to take the LiveRegion that defines this register and split it
       // into two values.
       assert(interval.containsOneValue());
-      LiveIndex DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
-      LiveIndex RedefIndex = getDefIndex(MIIdx);
+      SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex();
+      SlotIndex RedefIndex = MIIdx.getDefIndex();
       if (MO.isEarlyClobber())
-        RedefIndex = getUseIndex(MIIdx);
+        RedefIndex = MIIdx.getUseIndex();
 
       const LiveRange *OldLR =
-        interval.getLiveRangeContaining(getPrevSlot(RedefIndex));
+        interval.getLiveRangeContaining(RedefIndex.getUseIndex());
       VNInfo *OldValNo = OldLR->valno;
 
       // Delete the initial value, which should be short and continuous,
@@ -811,10 +393,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // If this redefinition is dead, we need to add a dummy unit live
       // range covering the def slot.
       if (MO.isDead())
-        interval.addRange(
-          LiveRange(RedefIndex, MO.isEarlyClobber() ?
-                                getNextSlot(getNextSlot(RedefIndex)) :
-                                getNextSlot(RedefIndex), OldValNo));
+        interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+                                    OldValNo));
 
       DEBUG({
           errs() << " RESULT: ";
@@ -829,9 +409,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         VNInfo *VNI = interval.getValNumInfo(0);
         MachineInstr *Killer = vi.Kills[0];
         phiJoinCopies.push_back(Killer);
-        LiveIndex Start = getMBBStartIdx(Killer->getParent());
-        LiveIndex End =
-          getNextSlot(getUseIndex(getInstructionIndex(Killer)));
+        SlotIndex Start = getMBBStartIdx(Killer->getParent());
+        SlotIndex End = getInstructionIndex(Killer).getDefIndex();
         DEBUG({
             errs() << " Removing [" << Start << "," << End << "] from: ";
             interval.print(errs(), tri_);
@@ -841,7 +420,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         assert(interval.ranges.size() == 1 &&
                "Newly discovered PHI interval has >1 ranges.");
         MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex());
-        VNI->addKill(terminatorGaps[killMBB]);
+        VNI->addKill(indexes_->getTerminatorGap(killMBB));
         VNI->setHasPHIKill(true);
         DEBUG({
             errs() << " RESULT: ";
@@ -851,8 +430,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         // Replace the interval with one of a NEW value number.  Note that this
         // value number isn't actually defined by an instruction, weird huh? :)
         LiveRange LR(Start, End,
-          interval.getNextValue(LiveIndex(mbb->getNumber()),
-                                0, false, VNInfoAllocator));
+                     interval.getNextValue(SlotIndex(getMBBStartIdx(mbb), true),
+                       0, false, VNInfoAllocator));
         LR.valno->setIsPHIDef(true);
         DEBUG(errs() << " replace range with " << LR);
         interval.addRange(LR);
@@ -866,9 +445,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // In the case of PHI elimination, each variable definition is only
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
-      LiveIndex defIndex = getDefIndex(MIIdx);
+      SlotIndex defIndex = MIIdx.getDefIndex();
       if (MO.isEarlyClobber())
-        defIndex = getUseIndex(MIIdx);
+        defIndex = MIIdx.getUseIndex();
 
       VNInfo *ValNo;
       MachineInstr *CopyMI = NULL;
@@ -880,10 +459,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         CopyMI = mi;
       ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
       
-      LiveIndex killIndex = getNextSlot(getMBBEndIdx(mbb));
+      SlotIndex killIndex = getMBBEndIdx(mbb).getNextIndex().getLoadIndex();
       LiveRange LR(defIndex, killIndex, ValNo);
       interval.addRange(LR);
-      ValNo->addKill(terminatorGaps[mbb]);
+      ValNo->addKill(indexes_->getTerminatorGap(mbb));
       ValNo->setHasPHIKill(true);
       DEBUG(errs() << " +" << LR);
     }
@@ -894,7 +473,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
 
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineBasicBlock::iterator mi,
-                                              LiveIndex MIIdx,
+                                              SlotIndex MIIdx,
                                               MachineOperand& MO,
                                               LiveInterval &interval,
                                               MachineInstr *CopyMI) {
@@ -905,12 +484,12 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
       printRegName(interval.reg, tri_);
     });
 
-  LiveIndex baseIndex = MIIdx;
-  LiveIndex start = getDefIndex(baseIndex);
+  SlotIndex baseIndex = MIIdx;
+  SlotIndex start = baseIndex.getDefIndex();
   // Earlyclobbers move back one.
   if (MO.isEarlyClobber())
-    start = getUseIndex(MIIdx);
-  LiveIndex end = start;
+    start = MIIdx.getUseIndex();
+  SlotIndex end = start;
 
   // If it is not used after definition, it is considered dead at
   // the instruction defining it. Hence its interval is:
@@ -919,53 +498,51 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // advance below compensates.
   if (MO.isDead()) {
     DEBUG(errs() << " dead");
-    if (MO.isEarlyClobber())
-      end = getNextSlot(getNextSlot(start));
-    else
-      end = getNextSlot(start);
+    end = start.getStoreIndex();
     goto exit;
   }
 
   // If it is not dead on definition, it must be killed by a
   // subsequent instruction. Hence its interval is:
   // [defSlot(def), useSlot(kill)+1)
-  baseIndex = getNextIndex(baseIndex);
+  baseIndex = baseIndex.getNextIndex();
   while (++mi != MBB->end()) {
-    while (baseIndex.getVecIndex() < i2miMap_.size() &&
-           getInstructionFromIndex(baseIndex) == 0)
-      baseIndex = getNextIndex(baseIndex);
+
+    if (getInstructionFromIndex(baseIndex) == 0)
+      baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(errs() << " killed");
-      end = getNextSlot(getUseIndex(baseIndex));
+      end = baseIndex.getDefIndex();
       goto exit;
     } else {
       int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
       if (DefIdx != -1) {
         if (mi->isRegTiedToUseOperand(DefIdx)) {
           // Two-address instruction.
-          end = getDefIndex(baseIndex);
-          if (mi->getOperand(DefIdx).isEarlyClobber())
-            end = getUseIndex(baseIndex);
+          end = baseIndex.getDefIndex();
+          assert(!mi->getOperand(DefIdx).isEarlyClobber() &&
+                 "Two address instruction is an early clobber?"); 
         } else {
           // Another instruction redefines the register before it is ever read.
           // Then the register is essentially dead at the instruction that defines
           // it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
           DEBUG(errs() << " dead");
-          end = getNextSlot(start);
+          end = start.getStoreIndex();
         }
         goto exit;
       }
     }
     
-    baseIndex = getNextIndex(baseIndex);
+    baseIndex = baseIndex.getNextIndex();
   }
   
   // The only case we should have a dead physreg here without a killing or
   // instruction where we know it's dead is if it is live-in to the function
   // and never used. Another possible case is the implicit use of the
   // physical register has been deleted by two-address pass.
-  end = getNextSlot(start);
+  end = start.getStoreIndex();
 
 exit:
   assert(start < end && "did not find end of interval?");
@@ -985,7 +562,7 @@ exit:
 
 void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
                                       MachineBasicBlock::iterator MI,
-                                      LiveIndex MIIdx,
+                                      SlotIndex MIIdx,
                                       MachineOperand& MO,
                                       unsigned MOIdx) {
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -1012,7 +589,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
 }
 
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
-                                         LiveIndex MIIdx,
+                                         SlotIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
   DEBUG({
       errs() << "\t\tlivein register: ";
@@ -1022,18 +599,18 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
   MachineBasicBlock::iterator mi = MBB->begin();
-  LiveIndex baseIndex = MIIdx;
-  LiveIndex start = baseIndex;
-  while (baseIndex.getVecIndex() < i2miMap_.size() && 
-         getInstructionFromIndex(baseIndex) == 0)
-    baseIndex = getNextIndex(baseIndex);
-  LiveIndex end = baseIndex;
+  SlotIndex baseIndex = MIIdx;
+  SlotIndex start = baseIndex;
+  if (getInstructionFromIndex(baseIndex) == 0)
+    baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+  SlotIndex end = baseIndex;
   bool SeenDefUse = false;
   
   while (mi != MBB->end()) {
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(errs() << " killed");
-      end = getNextSlot(getUseIndex(baseIndex));
+      end = baseIndex.getDefIndex();
       SeenDefUse = true;
       break;
     } else if (mi->modifiesRegister(interval.reg, tri_)) {
@@ -1042,17 +619,14 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
       DEBUG(errs() << " dead");
-      end = getNextSlot(getDefIndex(start));
+      end = start.getStoreIndex();
       SeenDefUse = true;
       break;
     }
 
-    baseIndex = getNextIndex(baseIndex);
     ++mi;
     if (mi != MBB->end()) {
-      while (baseIndex.getVecIndex() < i2miMap_.size() && 
-             getInstructionFromIndex(baseIndex) == 0)
-        baseIndex = getNextIndex(baseIndex);
+      baseIndex = indexes_->getNextNonNullIndex(baseIndex);
     }
   }
 
@@ -1060,7 +634,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   if (!SeenDefUse) {
     if (isAlias) {
       DEBUG(errs() << " dead");
-      end = getNextSlot(getDefIndex(MIIdx));
+      end = MIIdx.getStoreIndex();
     } else {
       DEBUG(errs() << " live through");
       end = baseIndex;
@@ -1068,7 +642,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   }
 
   VNInfo *vni =
-    interval.getNextValue(LiveIndex(MBB->getNumber()),
+    interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
                           0, false, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
@@ -1139,11 +713,11 @@ void LiveIntervals::performEarlyCoalescing() {
       MachineInstr *PHICopy = OtherCopies[i];
       DEBUG(errs() << "Moving: " << *PHICopy);
 
-      LiveIndex MIIndex = getInstructionIndex(PHICopy);
-      LiveIndex DefIndex = getDefIndex(MIIndex);
+      SlotIndex MIIndex = getInstructionIndex(PHICopy);
+      SlotIndex DefIndex = MIIndex.getDefIndex();
       LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
-      LiveIndex StartIndex = SLR->start;
-      LiveIndex EndIndex = SLR->end;
+      SlotIndex StartIndex = SLR->start;
+      SlotIndex EndIndex = SLR->end;
 
       // Delete val# defined by the now identity copy and add the range from
       // beginning of the mbb to the end of the range.
@@ -1169,11 +743,11 @@ void LiveIntervals::performEarlyCoalescing() {
       MachineInstr *PHICopy = IdentCopies[i];
       DEBUG(errs() << "Coalescing: " << *PHICopy);
 
-      LiveIndex MIIndex = getInstructionIndex(PHICopy);
-      LiveIndex DefIndex = getDefIndex(MIIndex);
+      SlotIndex MIIndex = getInstructionIndex(PHICopy);
+      SlotIndex DefIndex = MIIndex.getDefIndex();
       LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
-      LiveIndex StartIndex = SLR->start;
-      LiveIndex EndIndex = SLR->end;
+      SlotIndex StartIndex = SLR->start;
+      SlotIndex EndIndex = SLR->end;
 
       // Delete val# defined by the now identity copy and add the range from
       // beginning of the mbb to the end of the range.
@@ -1186,9 +760,9 @@ void LiveIntervals::performEarlyCoalescing() {
     }
 
     // Remove the phi join and update the phi block liveness.
-    LiveIndex MIIndex = getInstructionIndex(Join);
-    LiveIndex UseIndex = getUseIndex(MIIndex);
-    LiveIndex DefIndex = getDefIndex(MIIndex);
+    SlotIndex MIIndex = getInstructionIndex(Join);
+    SlotIndex UseIndex = MIIndex.getUseIndex();
+    SlotIndex DefIndex = MIIndex.getDefIndex();
     LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex);
     LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex);
     DLR->valno->setCopy(0);
@@ -1218,7 +792,7 @@ void LiveIntervals::computeIntervals() {
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
-    LiveIndex MIIndex = getMBBStartIdx(MBB);
+    SlotIndex MIIndex = getMBBStartIdx(MBB);
     DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
@@ -1235,9 +809,8 @@ void LiveIntervals::computeIntervals() {
     }
     
     // Skip over empty initial indices.
-    while (MIIndex.getVecIndex() < i2miMap_.size() &&
-           getInstructionFromIndex(MIIndex) == 0)
-      MIIndex = getNextIndex(MIIndex);
+    if (getInstructionFromIndex(MIIndex) == 0)
+      MIIndex = indexes_->getNextNonNullIndex(MIIndex);
     
     for (; MI != miEnd; ++MI) {
       DEBUG(errs() << MIIndex << "\t" << *MI);
@@ -1254,19 +827,9 @@ void LiveIntervals::computeIntervals() {
         else if (MO.isUndef())
           UndefUses.push_back(MO.getReg());
       }
-
-      // Skip over the empty slots after each instruction.
-      unsigned Slots = MI->getDesc().getNumDefs();
-      if (Slots == 0)
-        Slots = 1;
-
-      while (Slots--)
-        MIIndex = getNextIndex(MIIndex);
       
-      // Skip over empty indices.
-      while (MIIndex.getVecIndex() < i2miMap_.size() &&
-             getInstructionFromIndex(MIIndex) == 0)
-        MIIndex = getNextIndex(MIIndex);
+      // Move to the next instr slot.
+      MIIndex = indexes_->getNextNonNullIndex(MIIndex);
     }
   }
 
@@ -1279,45 +842,6 @@ void LiveIntervals::computeIntervals() {
   }
 }
 
-bool LiveIntervals::findLiveInMBBs(
-                              LiveIndex Start, LiveIndex End,
-                              SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
-  std::vector<IdxMBBPair>::const_iterator I =
-    std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
-
-  bool ResVal = false;
-  while (I != Idx2MBBMap.end()) {
-    if (I->first >= End)
-      break;
-    MBBs.push_back(I->second);
-    ResVal = true;
-    ++I;
-  }
-  return ResVal;
-}
-
-bool LiveIntervals::findReachableMBBs(
-                              LiveIndex Start, LiveIndex End,
-                              SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
-  std::vector<IdxMBBPair>::const_iterator I =
-    std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
-
-  bool ResVal = false;
-  while (I != Idx2MBBMap.end()) {
-    if (I->first > End)
-      break;
-    MachineBasicBlock *MBB = I->second;
-    if (getMBBEndIdx(MBB) > End)
-      break;
-    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-           SE = MBB->succ_end(); SI != SE; ++SI)
-      MBBs.push_back(*SI);
-    ResVal = true;
-    ++I;
-  }
-  return ResVal;
-}
-
 LiveInterval* LiveIntervals::createInterval(unsigned reg) {
   float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
   return new LiveInterval(reg, Weight);
@@ -1389,8 +913,8 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
 /// isValNoAvailableAt - Return true if the val# of the specified interval
 /// which reaches the given instruction also reaches the specified use index.
 bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
-                                       LiveIndex UseIdx) const {
-  LiveIndex Index = getInstructionIndex(MI);  
+                                       SlotIndex UseIdx) const {
+  SlotIndex Index = getInstructionIndex(MI);  
   VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
   LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
   return UI != li.end() && UI->valno == ValNo;
@@ -1417,7 +941,7 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
     for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
            re = mri_->use_end(); ri != re; ++ri) {
       MachineInstr *UseMI = &*ri;
-      LiveIndex UseIdx = getInstructionIndex(UseMI);
+      SlotIndex UseIdx = getInstructionIndex(UseMI);
       if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
         continue;
       if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
@@ -1502,7 +1026,7 @@ static bool FilterFoldedOps(MachineInstr *MI,
 /// returns true.
 bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
                                          VirtRegMap &vrm, MachineInstr *DefMI,
-                                         LiveIndex InstrIdx,
+                                         SlotIndex InstrIdx,
                                          SmallVector<unsigned, 2> &Ops,
                                          bool isSS, int Slot, unsigned Reg) {
   // If it is an implicit def instruction, just delete it.
@@ -1540,9 +1064,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
     vrm.transferSpillPts(MI, fmi);
     vrm.transferRestorePts(MI, fmi);
     vrm.transferEmergencySpills(MI, fmi);
-    mi2iMap_.erase(MI);
-    i2miMap_[InstrIdx.getVecIndex()] = fmi;
-    mi2iMap_[fmi] = InstrIdx;
+    ReplaceMachineInstrInMaps(MI, fmi);
     MI = MBB.insert(MBB.erase(MI), fmi);
     ++numFolds;
     return true;
@@ -1570,19 +1092,21 @@ bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
 }
 
 bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
-  SmallPtrSet<MachineBasicBlock*, 4> MBBs;
-  for (LiveInterval::Ranges::const_iterator
-         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    std::vector<IdxMBBPair>::const_iterator II =
-      std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), I->start);
-    if (II == Idx2MBBMap.end())
-      continue;
-    if (I->end > II->first)  // crossing a MBB.
-      return false;
-    MBBs.insert(II->second);
-    if (MBBs.size() > 1)
+  LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+
+  MachineBasicBlock *mbb =  indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+  if (mbb == 0)
+    return false;
+
+  for (++itr; itr != li.ranges.end(); ++itr) {
+    MachineBasicBlock *mbb2 =
+      indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+    if (mbb2 != mbb)
       return false;
   }
+
   return true;
 }
 
@@ -1614,7 +1138,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
 /// for addIntervalsForSpills to rewrite uses / defs for the given live range.
 bool LiveIntervals::
 rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-                 bool TrySplit, LiveIndex index, LiveIndex end, 
+                 bool TrySplit, SlotIndex index, SlotIndex end, 
                  MachineInstr *MI,
                  MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
                  unsigned Slot, int LdSlot,
@@ -1791,14 +1315,13 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
 
     if (HasUse) {
       if (CreatedNewVReg) {
-        LiveRange LR(getLoadIndex(index), getNextSlot(getUseIndex(index)),
-                     nI.getNextValue(LiveIndex(), 0, false,
-                                     VNInfoAllocator));
+        LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
+                     nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
         DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
       } else {
         // Extend the split live interval to this def / use.
-        LiveIndex End = getNextSlot(getUseIndex(index));
+        SlotIndex End = index.getDefIndex();
         LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
                      nI.getValNumInfo(nI.getNumValNums()-1));
         DEBUG(errs() << " +" << LR);
@@ -1806,9 +1329,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       }
     }
     if (HasDef) {
-      LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                   nI.getNextValue(LiveIndex(), 0, false,
-                                   VNInfoAllocator));
+      LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
+                   nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
       DEBUG(errs() << " +" << LR);
       nI.addRange(LR);
     }
@@ -1824,13 +1346,13 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
 bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
                                    const VNInfo *VNI,
                                    MachineBasicBlock *MBB,
-                                   LiveIndex Idx) const {
-  LiveIndex End = getMBBEndIdx(MBB);
+                                   SlotIndex Idx) const {
+  SlotIndex End = getMBBEndIdx(MBB);
   for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
-    if (VNI->kills[j].isPHIIndex())
+    if (VNI->kills[j].isPHI())
       continue;
 
-    LiveIndex KillIdx = VNI->kills[j];
+    SlotIndex KillIdx = VNI->kills[j];
     if (KillIdx > Idx && KillIdx < End)
       return true;
   }
@@ -1841,11 +1363,11 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
 /// during spilling.
 namespace {
   struct RewriteInfo {
-    LiveIndex Index;
+    SlotIndex Index;
     MachineInstr *MI;
     bool HasUse;
     bool HasDef;
-    RewriteInfo(LiveIndex i, MachineInstr *mi, bool u, bool d)
+    RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d)
       : Index(i), MI(mi), HasUse(u), HasDef(d) {}
   };
 
@@ -1874,8 +1396,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
                     std::vector<LiveInterval*> &NewLIs) {
   bool AllCanFold = true;
   unsigned NewVReg = 0;
-  LiveIndex start = getBaseIndex(I->start);
-  LiveIndex end = getNextIndex(getBaseIndex(getPrevSlot(I->end)));
+  SlotIndex start = I->start.getBaseIndex();
+  SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
 
   // First collect all the def / use in this live range that will be rewritten.
   // Make sure they are sorted according to instruction index.
@@ -1886,7 +1408,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineOperand &O = ri.getOperand();
     ++ri;
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
-    LiveIndex index = getInstructionIndex(MI);
+    SlotIndex index = getInstructionIndex(MI);
     if (index < start || index >= end)
       continue;
 
@@ -1910,7 +1432,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
   for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
     RewriteInfo &rwi = RewriteMIs[i];
     ++i;
-    LiveIndex index = rwi.Index;
+    SlotIndex index = rwi.Index;
     bool MIHasUse = rwi.HasUse;
     bool MIHasDef = rwi.HasDef;
     MachineInstr *MI = rwi.MI;
@@ -1993,12 +1515,12 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
       if (MI != ReMatOrigDefMI || !CanDelete) {
         bool HasKill = false;
         if (!HasUse)
-          HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index));
+          HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
         else {
           // If this is a two-address code, then this index starts a new VNInfo.
-          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(getDefIndex(index));
+          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
           if (VNI)
-            HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index));
+            HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
         }
         DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
           SpillIdxes.find(MBBId);
@@ -2071,7 +1593,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
   }
 }
 
-bool LiveIntervals::alsoFoldARestore(int Id, LiveIndex index,
+bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
                         unsigned vr, BitVector &RestoreMBBs,
                         DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
   if (!RestoreMBBs[Id])
@@ -2085,7 +1607,7 @@ bool LiveIntervals::alsoFoldARestore(int Id, LiveIndex index,
   return false;
 }
 
-void LiveIntervals::eraseRestoreInfo(int Id, LiveIndex index,
+void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
                         unsigned vr, BitVector &RestoreMBBs,
                         DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
   if (!RestoreMBBs[Id])
@@ -2093,7 +1615,7 @@ void LiveIntervals::eraseRestoreInfo(int Id, LiveIndex index,
   std::vector<SRInfo> &Restores = RestoreIdxes[Id];
   for (unsigned i = 0, e = Restores.size(); i != e; ++i)
     if (Restores[i].index == index && Restores[i].vreg)
-      Restores[i].index = LiveIndex();
+      Restores[i].index = SlotIndex();
 }
 
 /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
@@ -2192,18 +1714,18 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       }
       
       // Fill in  the new live interval.
-      LiveIndex index = getInstructionIndex(MI);
+      SlotIndex index = getInstructionIndex(MI);
       if (HasUse) {
-        LiveRange LR(getLoadIndex(index), getUseIndex(index),
-                     nI.getNextValue(LiveIndex(), 0, false,
+        LiveRange LR(index.getLoadIndex(), index.getUseIndex(),
+                     nI.getNextValue(SlotIndex(), 0, false,
                                      getVNInfoAllocator()));
         DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
       if (HasDef) {
-        LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                     nI.getNextValue(LiveIndex(), 0, false,
+        LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
+                     nI.getNextValue(SlotIndex(), 0, false,
                                      getVNInfoAllocator()));
         DEBUG(errs() << " +" << LR);
         nI.addRange(LR);
@@ -2267,8 +1789,8 @@ addIntervalsForSpills(const LiveInterval &li,
   if (vrm.getPreSplitReg(li.reg)) {
     vrm.setIsSplitFromReg(li.reg, 0);
     // Unset the split kill marker on the last use.
-    LiveIndex KillIdx = vrm.getKillPoint(li.reg);
-    if (KillIdx != LiveIndex()) {
+    SlotIndex KillIdx = vrm.getKillPoint(li.reg);
+    if (KillIdx != SlotIndex()) {
       MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
       assert(KillMI && "Last use disappeared?");
       int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
@@ -2394,7 +1916,7 @@ addIntervalsForSpills(const LiveInterval &li,
     while (Id != -1) {
       std::vector<SRInfo> &spills = SpillIdxes[Id];
       for (unsigned i = 0, e = spills.size(); i != e; ++i) {
-        LiveIndex index = spills[i].index;
+        SlotIndex index = spills[i].index;
         unsigned VReg = spills[i].vreg;
         LiveInterval &nI = getOrCreateInterval(VReg);
         bool isReMat = vrm.isReMaterialized(VReg);
@@ -2432,16 +1954,16 @@ addIntervalsForSpills(const LiveInterval &li,
             if (FoundUse) {
               // Also folded uses, do not issue a load.
               eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
-              nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index)));
+              nI.removeRange(index.getLoadIndex(), index.getDefIndex());
             }
-            nI.removeRange(getDefIndex(index), getStoreIndex(index));
+            nI.removeRange(index.getDefIndex(), index.getStoreIndex());
           }
         }
 
         // Otherwise tell the spiller to issue a spill.
         if (!Folded) {
           LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
-          bool isKill = LR->end == getStoreIndex(index);
+          bool isKill = LR->end == index.getStoreIndex();
           if (!MI->registerDefIsDead(nI.reg))
             // No need to spill a dead def.
             vrm.addSpillPoint(VReg, isKill, MI);
@@ -2457,8 +1979,8 @@ addIntervalsForSpills(const LiveInterval &li,
   while (Id != -1) {
     std::vector<SRInfo> &restores = RestoreIdxes[Id];
     for (unsigned i = 0, e = restores.size(); i != e; ++i) {
-      LiveIndex index = restores[i].index;
-      if (index == LiveIndex())
+      SlotIndex index = restores[i].index;
+      if (index == SlotIndex())
         continue;
       unsigned VReg = restores[i].vreg;
       LiveInterval &nI = getOrCreateInterval(VReg);
@@ -2513,7 +2035,7 @@ addIntervalsForSpills(const LiveInterval &li,
       // If folding is not possible / failed, then tell the spiller to issue a
       // load / rematerialization for us.
       if (Folded)
-        nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index)));
+        nI.removeRange(index.getLoadIndex(), index.getDefIndex());
       else
         vrm.addRestorePoint(VReg, MI);
     }
@@ -2526,10 +2048,10 @@ addIntervalsForSpills(const LiveInterval &li,
   for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
     LiveInterval *LI = NewLIs[i];
     if (!LI->empty()) {
-      LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI);
+      LI->weight /= SlotIndex::NUM * getApproximateInstructionCount(*LI);
       if (!AddedKill.count(LI)) {
         LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
-        LiveIndex LastUseIdx = getBaseIndex(LR->end);
+        SlotIndex LastUseIdx = LR->end.getBaseIndex();
         MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
         int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
         assert(UseIdx != -1);
@@ -2580,7 +2102,7 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
     MachineInstr *MI = O.getParent();
-    LiveIndex Index = getInstructionIndex(MI);
+    SlotIndex Index = getInstructionIndex(MI);
     if (pli.liveAt(Index))
       ++NumConflicts;
   }
@@ -2623,15 +2145,15 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
     if (SeenMIs.count(MI))
       continue;
     SeenMIs.insert(MI);
-    LiveIndex Index = getInstructionIndex(MI);
+    SlotIndex Index = getInstructionIndex(MI);
     for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
       unsigned PReg = PRegs[i];
       LiveInterval &pli = getInterval(PReg);
       if (!pli.liveAt(Index))
         continue;
       vrm.addEmergencySpill(PReg, MI);
-      LiveIndex StartIdx = getLoadIndex(Index);
-      LiveIndex EndIdx = getNextSlot(getStoreIndex(Index));
+      SlotIndex StartIdx = Index.getLoadIndex();
+      SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
       if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
         pli.removeRange(StartIdx, EndIdx);
         Cut = true;
@@ -2651,7 +2173,8 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
           continue;
         LiveInterval &spli = getInterval(*AS);
         if (spli.liveAt(Index))
-          spli.removeRange(getLoadIndex(Index), getNextSlot(getStoreIndex(Index)));
+          spli.removeRange(Index.getLoadIndex(),
+                           Index.getNextIndex().getBaseIndex());
       }
     }
   }
@@ -2662,13 +2185,13 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
                                                   MachineInstr* startInst) {
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
-    LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF),
+    SlotIndex(getInstructionIndex(startInst).getDefIndex()),
     startInst, true, getVNInfoAllocator());
   VN->setHasPHIKill(true);
-  VN->kills.push_back(terminatorGaps[startInst->getParent()]);
+  VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent()));
   LiveRange LR(
-    LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF),
-    getNextSlot(getMBBEndIdx(startInst->getParent())), VN);
+     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+     getMBBEndIdx(startInst->getParent()).getNextIndex().getBaseIndex(), VN);
   Interval.addRange(LR);
   
   return LR;
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index a7bea1f..d2f3775 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -27,15 +27,10 @@ using namespace llvm;
 char LiveStacks::ID = 0;
 static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis");
 
-void LiveStacks::scaleNumbering(int factor) {
-  // Scale the intervals.
-  for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
-    LI->second.scaleNumbering(factor);
-  }
-}
-
 void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequiredTransitive<SlotIndexes>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 8486bb0..30636a8 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -25,13 +25,16 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN LowerSubregsInstructionPass
-   : public MachineFunctionPass {
+  struct LowerSubregsInstructionPass : public MachineFunctionPass {
+  private:
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+  public:
     static char ID; // Pass identification, replacement for typeid
     LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {}
     
@@ -48,15 +51,16 @@ namespace {
 
     /// runOnMachineFunction - pass entry point
     bool runOnMachineFunction(MachineFunction&);
-    
+
+  private:
     bool LowerExtract(MachineInstr *MI);
     bool LowerInsert(MachineInstr *MI);
     bool LowerSubregToReg(MachineInstr *MI);
 
     void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
-                          const TargetRegisterInfo &TRI);
+                          const TargetRegisterInfo *TRI);
     void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
-                          const TargetRegisterInfo &TRI,
+                          const TargetRegisterInfo *TRI,
                           bool AddIfNotFound = false);
   };
 
@@ -73,10 +77,10 @@ FunctionPass *llvm::createLowerSubregsPass() {
 void
 LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
                                               unsigned DstReg,
-                                              const TargetRegisterInfo &TRI) {
+                                              const TargetRegisterInfo *TRI) {
   for (MachineBasicBlock::iterator MII =
         prior(MachineBasicBlock::iterator(MI)); ; --MII) {
-    if (MII->addRegisterDead(DstReg, &TRI))
+    if (MII->addRegisterDead(DstReg, TRI))
       break;
     assert(MII != MI->getParent()->begin() &&
            "copyRegToReg output doesn't reference destination register!");
@@ -89,11 +93,11 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
 void
 LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
                                               unsigned SrcReg,
-                                              const TargetRegisterInfo &TRI,
+                                              const TargetRegisterInfo *TRI,
                                               bool AddIfNotFound) {
   for (MachineBasicBlock::iterator MII =
         prior(MachineBasicBlock::iterator(MI)); ; --MII) {
-    if (MII->addRegisterKilled(SrcReg, &TRI, AddIfNotFound))
+    if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound))
       break;
     assert(MII != MI->getParent()->begin() &&
            "copyRegToReg output doesn't reference source register!");
@@ -102,9 +106,6 @@ LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
 
 bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
   MachineBasicBlock *MBB = MI->getParent();
-  MachineFunction &MF = *MBB->getParent();
-  const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
 
   assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
          MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
@@ -113,7 +114,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
   unsigned DstReg   = MI->getOperand(0).getReg();
   unsigned SuperReg = MI->getOperand(1).getReg();
   unsigned SubIdx   = MI->getOperand(2).getImm();
-  unsigned SrcReg   = TRI.getSubReg(SuperReg, SubIdx);
+  unsigned SrcReg   = TRI->getSubReg(SuperReg, SubIdx);
 
   assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) &&
          "Extract supperg source must be a physical register");
@@ -128,7 +129,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
     if (MI->getOperand(1).isKill()) {
       // We must make sure the super-register gets killed. Replace the
       // instruction with KILL.
-      MI->setDesc(TII.get(TargetInstrInfo::KILL));
+      MI->setDesc(TII->get(TargetInstrInfo::KILL));
       MI->RemoveOperand(2);     // SubIdx
       DEBUG(errs() << "subreg: replace by: " << *MI);
       return true;
@@ -137,9 +138,9 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
     DEBUG(errs() << "subreg: eliminated!");
   } else {
     // Insert copy
-    const TargetRegisterClass *TRCS = TRI.getPhysicalRegisterRegClass(DstReg);
-    const TargetRegisterClass *TRCD = TRI.getPhysicalRegisterRegClass(SrcReg);
-    bool Emitted = TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
+    const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
+    const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg);
+    bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
     (void)Emitted;
     assert(Emitted && "Subreg and Dst must be of compatible register class");
     // Transfer the kill/dead flags, if needed.
@@ -160,9 +161,6 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
 
 bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
   MachineBasicBlock *MBB = MI->getParent();
-  MachineFunction &MF = *MBB->getParent();
-  const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); 
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
          MI->getOperand(1).isImm() &&
          (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
@@ -174,7 +172,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
   unsigned SubIdx  = MI->getOperand(3).getImm();
 
   assert(SubIdx != 0 && "Invalid index for insert_subreg");
-  unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
+  unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
 
   assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
          "Insert destination must be in a physical register");
@@ -193,9 +191,11 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
     DEBUG(errs() << "subreg: eliminated!");
   } else {
     // Insert sub-register copy
-    const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
-    const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
-    TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
+    const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
+    bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+    (void)Emitted;
+    assert(Emitted && "Subreg and Dst must be of compatible register class");
     // Transfer the kill/dead flags, if needed.
     if (MI->getOperand(0).isDead())
       TransferDeadFlag(MI, DstSubReg, TRI);
@@ -209,14 +209,11 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
 
   DEBUG(errs() << '\n');
   MBB->erase(MI);
-  return true;                    
+  return true;
 }
 
 bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
   MachineBasicBlock *MBB = MI->getParent();
-  MachineFunction &MF = *MBB->getParent();
-  const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); 
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
          (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) &&
          (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
@@ -231,7 +228,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
 
   assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
   assert(SubIdx != 0 && "Invalid index for insert_subreg");
-  unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
+  unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
   assert(DstSubReg && "invalid subregister index for register");
   assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
          "Insert superreg source must be in a physical register");
@@ -245,7 +242,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
     // <undef>, we need to make sure it is alive by inserting a KILL
     if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
       MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                TII.get(TargetInstrInfo::KILL), DstReg);
+                                TII->get(TargetInstrInfo::KILL), DstReg);
       if (MI->getOperand(2).isUndef())
         MIB.addReg(InsReg, RegState::Undef);
       else
@@ -257,15 +254,18 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
     }
   } else {
     // Insert sub-register copy
-    const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
-    const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
+    const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
+    const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
     if (MI->getOperand(2).isUndef())
       // If the source register being inserted is undef, then this becomes a
       // KILL.
       BuildMI(*MBB, MI, MI->getDebugLoc(),
-              TII.get(TargetInstrInfo::KILL), DstSubReg);
-    else
-      TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+              TII->get(TargetInstrInfo::KILL), DstSubReg);
+    else {
+      bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+      (void)Emitted;
+      assert(Emitted && "Subreg and Dst must be of compatible register class");
+    }
     MachineBasicBlock::iterator CopyMI = MI;
     --CopyMI;
 
@@ -303,6 +303,8 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
                << "********** LOWERING SUBREG INSTRS **********\n"
                << "********** Function: " 
                << MF.getFunction()->getName() << '\n');
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
 
   bool MadeChange = false;
 
@@ -310,8 +312,8 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
        mbbi != mbbe; ++mbbi) {
     for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
          mi != me;) {
-      MachineInstr *MI = mi++;
-           
+      MachineBasicBlock::iterator nmi = next(mi);
+      MachineInstr *MI = mi;
       if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
         MadeChange |= LowerExtract(MI);
       } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
@@ -319,6 +321,7 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
         MadeChange |= LowerSubregToReg(MI);
       }
+      mi = nmi;
     }
   }
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index b3eb2da..7fbdb12 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -20,11 +20,13 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Assembly/Writer.h"
 #include <algorithm>
 using namespace llvm;
 
 MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
-  : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false) {
+  : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+    AddressTaken(false) {
   Insts.Parent = this;
 }
 
@@ -160,11 +162,11 @@ void MachineBasicBlock::dump() const {
 
 static inline void OutputReg(raw_ostream &os, unsigned RegNo,
                              const TargetRegisterInfo *TRI = 0) {
-  if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) {
+  if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
     if (TRI)
       os << " %" << TRI->get(RegNo).Name;
     else
-      os << " %mreg(" << RegNo << ")";
+      os << " %physreg" << RegNo;
   } else
     os << " %reg" << RegNo;
 }
@@ -177,18 +179,23 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
     return;
   }
 
-  const BasicBlock *LBB = getBasicBlock();
+  if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+
+  OS << "BB#" << getNumber() << ": ";
+
+  const char *Comma = "";
+  if (const BasicBlock *LBB = getBasicBlock()) {
+    OS << Comma << "derived from LLVM BB ";
+    WriteAsOperand(OS, LBB, /*PrintType=*/false);
+    Comma = ", ";
+  }
+  if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+  if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
   OS << '\n';
-  if (LBB) OS << LBB->getName() << ": ";
-  OS << (const void*)this
-     << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
-  if (Alignment) OS << ", Alignment " << Alignment;
-  if (isLandingPad()) OS << ", EH LANDING PAD";
-  OS << ":\n";
 
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();  
   if (!livein_empty()) {
-    OS << "Live Ins:";
+    OS << "    Live Ins:";
     for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
       OutputReg(OS, *I, TRI);
     OS << '\n';
@@ -197,7 +204,7 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
   if (!pred_empty()) {
     OS << "    Predecessors according to CFG:";
     for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
-      OS << ' ' << *PI << " (#" << (*PI)->getNumber() << ')';
+      OS << " BB#" << (*PI)->getNumber();
     OS << '\n';
   }
   
@@ -210,7 +217,7 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
   if (!succ_empty()) {
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
-      OS << ' ' << *SI << " (#" << (*SI)->getNumber() << ')';
+      OS << " BB#" << (*SI)->getNumber();
     OS << '\n';
   }
 }
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index b0ec809..5a1d9e6 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -30,13 +30,12 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
+  struct Printer : public MachineFunctionPass {
     static char ID;
 
     raw_ostream &OS;
@@ -53,7 +52,7 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &MF) {
-      OS << Banner;
+      OS << "# " << Banner << ":\n";
       MF.print(OS);
       return false;
     }
@@ -304,7 +303,7 @@ void MachineFunction::dump() const {
 }
 
 void MachineFunction::print(raw_ostream &OS) const {
-  OS << "# Machine code for " << Fn->getName() << "():\n";
+  OS << "# Machine code for function " << Fn->getName() << ":\n";
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -318,34 +317,43 @@ void MachineFunction::print(raw_ostream &OS) const {
   const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
   
   if (RegInfo && !RegInfo->livein_empty()) {
-    OS << "Live Ins:";
+    OS << "Function Live Ins: ";
     for (MachineRegisterInfo::livein_iterator
          I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
       if (TRI)
-        OS << " " << TRI->getName(I->first);
+        OS << "%" << TRI->getName(I->first);
       else
-        OS << " Reg #" << I->first;
+        OS << " %physreg" << I->first;
       
       if (I->second)
-        OS << " in VR#" << I->second << ' ';
+        OS << " in reg%" << I->second;
+
+      if (next(I) != E)
+        OS << ", ";
     }
     OS << '\n';
   }
   if (RegInfo && !RegInfo->liveout_empty()) {
-    OS << "Live Outs:";
+    OS << "Function Live Outs: ";
     for (MachineRegisterInfo::liveout_iterator
-         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){
       if (TRI)
-        OS << ' ' << TRI->getName(*I);
+        OS << '%' << TRI->getName(*I);
       else
-        OS << " Reg #" << *I;
+        OS << "%physreg" << *I;
+
+      if (next(I) != E)
+        OS << " ";
+    }
     OS << '\n';
   }
   
-  for (const_iterator BB = begin(), E = end(); BB != E; ++BB)
+  for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+    OS << '\n';
     BB->print(OS);
+  }
 
-  OS << "\n# End machine code for " << Fn->getName() << "().\n\n";
+  OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
 }
 
 namespace llvm {
@@ -472,12 +480,16 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
 
 
 void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+  if (Objects.empty()) return;
+
   const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
   int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
 
+  OS << "Frame Objects:\n";
+
   for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
     const StackObject &SO = Objects[i];
-    OS << "  <fi#" << (int)(i-NumFixedObjects) << ">: ";
+    OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";
     if (SO.Size == ~0ULL) {
       OS << "dead\n";
       continue;
@@ -485,15 +497,14 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
     if (SO.Size == 0)
       OS << "variable sized";
     else
-      OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ",");
-    OS << " alignment is " << SO.Alignment << " byte"
-       << (SO.Alignment != 1 ? "s," : ",");
+      OS << "size=" << SO.Size;
+    OS << ", align=" << SO.Alignment;
 
     if (i < NumFixedObjects)
-      OS << " fixed";
+      OS << ", fixed";
     if (i < NumFixedObjects || SO.SPOffset != -1) {
       int64_t Off = SO.SPOffset - ValOffset;
-      OS << " at location [SP";
+      OS << ", at location [SP";
       if (Off > 0)
         OS << "+" << Off;
       else if (Off < 0)
@@ -502,9 +513,6 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
     }
     OS << "\n";
   }
-
-  if (HasVarSizedObjects)
-    OS << "  Stack frame contains variable sized objects\n";
 }
 
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
@@ -548,12 +556,17 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
 }
 
 void MachineJumpTableInfo::print(raw_ostream &OS) const {
-  // FIXME: this is lame, maybe we could print out the MBB numbers or something
-  // like {1, 2, 4, 5, 3, 0}
+  if (JumpTables.empty()) return;
+
+  OS << "Jump Tables:\n";
+
   for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
-    OS << "  <jt#" << i << "> has " << JumpTables[i].MBBs.size() 
-       << " entries\n";
+    OS << "  jt#" << i << ": ";
+    for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+      OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
   }
+
+  OS << '\n';
 }
 
 void MachineJumpTableInfo::dump() const { print(errs()); }
@@ -582,6 +595,48 @@ MachineConstantPool::~MachineConstantPool() {
       delete Constants[i].Val.MachineCPVal;
 }
 
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
+                                      const TargetData *TD) {
+  // Handle the trivial case quickly.
+  if (A == B) return true;
+
+  // If they have the same type but weren't the same constant, quickly
+  // reject them.
+  if (A->getType() == B->getType()) return false;
+
+  // For now, only support constants with the same size.
+  if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+    return false;
+
+  // If a floating-point value and an integer value have the same encoding,
+  // they can share a constant-pool entry.
+  if (ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+    if (ConstantInt *BI = dyn_cast<ConstantInt>(B))
+      return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
+  if (ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+    if (ConstantInt *AI = dyn_cast<ConstantInt>(A))
+      return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
+
+  // Two vectors can share an entry if each pair of corresponding
+  // elements could.
+  if (ConstantVector *AV = dyn_cast<ConstantVector>(A))
+    if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+      if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
+        return false;
+      for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
+        if (!CanShareConstantPoolEntry(AV->getOperand(i),
+                                       BV->getOperand(i), TD))
+          return false;
+      return true;
+    }
+
+  // TODO: Handle other cases.
+
+  return false;
+}
+
 /// getConstantPoolIndex - Create a new entry in the constant pool or return
 /// an existing one.  User must specify the log2 of the minimum required
 /// alignment for the object.
@@ -590,14 +645,17 @@ unsigned MachineConstantPool::getConstantPoolIndex(Constant *C,
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
   if (Alignment > PoolAlignment) PoolAlignment = Alignment;
-  
+
   // Check to see if we already have this constant.
   //
   // FIXME, this could be made much more efficient for large constant pools.
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
-    if (Constants[i].Val.ConstVal == C &&
-        (Constants[i].getAlignment() & (Alignment - 1)) == 0)
+    if (!Constants[i].isMachineConstantPoolEntry() &&
+        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+      if ((unsigned)Constants[i].getAlignment() < Alignment)
+        Constants[i].Alignment = Alignment;
       return i;
+    }
   
   Constants.push_back(MachineConstantPoolEntry(C, Alignment));
   return Constants.size()-1;
@@ -620,13 +678,16 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
 }
 
 void MachineConstantPool::print(raw_ostream &OS) const {
+  if (Constants.empty()) return;
+
+  OS << "Constant Pool:\n";
   for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
-    OS << "  <cp#" << i << "> is";
+    OS << "  cp#" << i << ": ";
     if (Constants[i].isMachineConstantPoolEntry())
       Constants[i].Val.MachineCPVal->print(OS);
     else
       OS << *(Value*)Constants[i].Val.ConstVal;
-    OS << " , alignment=" << Constants[i].getAlignment();
+    OS << ", align=" << Constants[i].getAlignment();
     OS << "\n";
   }
 }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 1f85e92..5744c8a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Constants.h"
+#include "llvm/Function.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Value.h"
 #include "llvm/Assembly/Writer.h"
@@ -180,6 +181,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
   case MachineOperand::MO_ExternalSymbol:
     return !strcmp(getSymbolName(), Other.getSymbolName()) &&
            getOffset() == Other.getOffset();
+  case MachineOperand::MO_BlockAddress:
+    return getBlockAddress() == Other.getBlockAddress();
   }
 }
 
@@ -202,7 +205,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
       if (TM)
         OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
       else
-        OS << "%mreg" << getReg();
+        OS << "%physreg" << getReg();
     }
 
     if (getSubReg() != 0)
@@ -248,9 +251,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
       OS << getFPImm()->getValueAPF().convertToDouble();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    OS << "mbb<"
-       << ((Value*)getMBB()->getBasicBlock())->getName()
-       << "," << (void*)getMBB() << '>';
+    OS << "<BB#" << getMBB()->getNumber() << ">";
     break;
   case MachineOperand::MO_FrameIndex:
     OS << "<fi#" << getIndex() << '>';
@@ -273,6 +274,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
+  case MachineOperand::MO_BlockAddress:
+    OS << "<";
+    WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+    OS << '>';
+    break;
   default:
     llvm_unreachable("Unrecognized operand type");
   }
@@ -1054,16 +1060,24 @@ void MachineInstr::dump() const {
 }
 
 void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
-  // Specialize printing if op#0 is definition
-  unsigned StartOp = 0;
-  if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) {
-    getOperand(0).print(OS, TM);
-    OS << " = ";
-    ++StartOp;   // Don't print this operand again!
+  unsigned StartOp = 0, e = getNumOperands();
+
+  // Print explicitly defined operands on the left of an assignment syntax.
+  for (; StartOp < e && getOperand(StartOp).isReg() &&
+         getOperand(StartOp).isDef() &&
+         !getOperand(StartOp).isImplicit();
+       ++StartOp) {
+    if (StartOp != 0) OS << ", ";
+    getOperand(StartOp).print(OS, TM);
   }
 
+  if (StartOp != 0)
+    OS << " = ";
+
+  // Print the opcode name.
   OS << getDesc().getName();
 
+  // Print the rest of the operands.
   for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
     if (i != StartOp)
       OS << ",";
@@ -1071,8 +1085,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     getOperand(i).print(OS, TM);
   }
 
+  bool HaveSemi = false;
   if (!memoperands_empty()) {
-    OS << ", Mem:";
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+    OS << " mem:";
     for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
          i != e; ++i) {
       OS << **i;
@@ -1082,14 +1099,16 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   }
 
   if (!debugLoc.isUnknown()) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+    // TODO: print InlinedAtLoc information
+
     const MachineFunction *MF = getParent()->getParent();
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
     DICompileUnit CU(DLT.Scope);
     if (!CU.isNull())
-      OS << " [dbg: "
-         << CU.getDirectory() << '/' << CU.getFilename() << ","
-         << DLT.Line << ","
-         << DLT.Col  << "]";
+      OS << " dbg:" << CU.getDirectory() << '/' << CU.getFilename() << ":"
+         << DLT.Line << ":" << DLT.Col;
   }
 
   OS << "\n";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index f92ddb2..1306aa6 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -24,14 +24,15 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -41,7 +42,7 @@ STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
 STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
 
 namespace {
-  class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass {
+  class MachineLICM : public MachineFunctionPass {
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -55,12 +56,12 @@ namespace {
 
     // State that is updated as we process loops
     bool         Changed;          // True if a loop is changed.
+    bool         FirstInLoop;      // True if it's the first LICM in the loop.
     MachineLoop *CurLoop;          // The current loop we are working on.
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
-    // For each BB and opcode pair, keep a list of hoisted instructions.
-    DenseMap<std::pair<unsigned, unsigned>,
-      std::vector<const MachineInstr*> > CSEMap;
+    // For each opcode, keep a list of potentail CSE instructions.
+    DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
   public:
     static char ID; // Pass identification, replacement for typeid
     MachineLICM() : MachineFunctionPass(&ID) {}
@@ -104,10 +105,21 @@ namespace {
     ///
     void HoistRegion(MachineDomTreeNode *N);
 
+    /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+    /// the load itself could be hoisted. Return the unfolded and hoistable
+    /// load, or null if the load couldn't be unfolded or if it wouldn't
+    /// be hoistable.
+    MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
     /// Hoist - When an instruction is found to only use loop invariant operands
     /// that is safe to hoist, this instruction is called to do the dirty work.
     ///
-    void Hoist(MachineInstr &MI);
+    void Hoist(MachineInstr *MI);
+
+    /// InitCSEMap - Initialize the CSE map with instructions that are in the
+    /// current loop preheader that may become duplicates of instructions that
+    /// are hoisted out of the loop.
+    void InitCSEMap(MachineBasicBlock *BB);
   };
 } // end anonymous namespace
 
@@ -133,7 +145,7 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(errs() << "******** Machine LICM ********\n");
 
-  Changed = false;
+  Changed = FirstInLoop = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
@@ -145,8 +157,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   DT = &getAnalysis<MachineDominatorTree>();
   AA = &getAnalysis<AliasAnalysis>();
 
-  for (MachineLoopInfo::iterator
-         I = LI->begin(), E = LI->end(); I != E; ++I) {
+  for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
     CurLoop = *I;
 
     // Only visit outer-most preheader-sporting loops.
@@ -163,7 +174,11 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     if (!CurPreheader)
       continue;
 
+    // CSEMap is initialized for loop header when the first instruction is
+    // being hoisted.
+    FirstInLoop = true;
     HoistRegion(DT->getNode(CurLoop->getHeader()));
+    CSEMap.clear();
   }
 
   return Changed;
@@ -184,10 +199,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
   for (MachineBasicBlock::iterator
          MII = BB->begin(), E = BB->end(); MII != E; ) {
     MachineBasicBlock::iterator NextMII = MII; ++NextMII;
-    MachineInstr &MI = *MII;
-
-    Hoist(MI);
-
+    Hoist(&*MII);
     MII = NextMII;
   }
 
@@ -368,42 +380,125 @@ static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
   return 0;
 }
 
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+  // If not, we may be able to unfold a load and hoist that.
+  // First test whether the instruction is loading from an amenable
+  // memory location.
+  if (!MI->getDesc().mayLoad()) return 0;
+  if (!MI->hasOneMemOperand()) return 0;
+  MachineMemOperand *MMO = *MI->memoperands_begin();
+  if (MMO->isVolatile()) return 0;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  if (!MMO->getValue()) return 0;
+  if (const PseudoSourceValue *PSV =
+        dyn_cast<PseudoSourceValue>(MMO->getValue())) {
+    if (!PSV->isConstant(MF.getFrameInfo())) return 0;
+  } else {
+    if (!AA->pointsToConstantMemory(MMO->getValue())) return 0;
+  }
+  // Next determine the register class for a temporary register.
+  unsigned LoadRegIndex;
+  unsigned NewOpc =
+    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+                                    /*UnfoldLoad=*/true,
+                                    /*UnfoldStore=*/false,
+                                    &LoadRegIndex);
+  if (NewOpc == 0) return 0;
+  const TargetInstrDesc &TID = TII->get(NewOpc);
+  if (TID.getNumDefs() != 1) return 0;
+  const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
+  // Ok, we're unfolding. Create a temporary register and do the unfold.
+  unsigned Reg = RegInfo->createVirtualRegister(RC);
+  SmallVector<MachineInstr *, 2> NewMIs;
+  bool Success =
+    TII->unfoldMemoryOperand(MF, MI, Reg,
+                             /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+                             NewMIs);
+  (void)Success;
+  assert(Success &&
+         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+         "succeeded!");
+  assert(NewMIs.size() == 2 &&
+         "Unfolded a load into multiple instructions!");
+  MachineBasicBlock *MBB = MI->getParent();
+  MBB->insert(MI, NewMIs[0]);
+  MBB->insert(MI, NewMIs[1]);
+  // If unfolding produced a load that wasn't loop-invariant or profitable to
+  // hoist, discard the new instructions and bail.
+  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+    NewMIs[0]->eraseFromParent();
+    NewMIs[1]->eraseFromParent();
+    return 0;
+  }
+  // Otherwise we successfully unfolded a load that we can hoist.
+  MI->eraseFromParent();
+  return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+    const MachineInstr *MI = &*I;
+    // FIXME: For now, only hoist re-materilizable instructions. LICM will
+    // increase register pressure. We want to make sure it doesn't increase
+    // spilling.
+    if (TII->isTriviallyReMaterializable(MI, AA)) {
+      unsigned Opcode = MI->getOpcode();
+      DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+        CI = CSEMap.find(Opcode);
+      if (CI != CSEMap.end())
+        CI->second.push_back(MI);
+      else {
+        std::vector<const MachineInstr*> CSEMIs;
+        CSEMIs.push_back(MI);
+        CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+      }
+    }
+  }
+}
+
 /// Hoist - When an instruction is found to use only loop invariant operands
 /// that are safe to hoist, this instruction is called to do the dirty work.
 ///
-void MachineLICM::Hoist(MachineInstr &MI) {
-  if (!IsLoopInvariantInst(MI)) return;
-  if (!IsProfitableToHoist(MI)) return;
+void MachineLICM::Hoist(MachineInstr *MI) {
+  // First check whether we should hoist this instruction.
+  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+    // If not, try unfolding a hoistable load.
+    MI = ExtractHoistableLoad(MI);
+    if (!MI) return;
+  }
 
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
   DEBUG({
-      errs() << "Hoisting " << MI;
+      errs() << "Hoisting " << *MI;
       if (CurPreheader->getBasicBlock())
         errs() << " to MachineBasicBlock "
                << CurPreheader->getBasicBlock()->getName();
-      if (MI.getParent()->getBasicBlock())
+      if (MI->getParent()->getBasicBlock())
         errs() << " from MachineBasicBlock "
-               << MI.getParent()->getBasicBlock()->getName();
+               << MI->getParent()->getBasicBlock()->getName();
       errs() << "\n";
     });
 
+  // If this is the first instruction being hoisted to the preheader,
+  // initialize the CSE map with potential common expressions.
+  InitCSEMap(CurPreheader);
+
   // Look for opportunity to CSE the hoisted instruction.
-  std::pair<unsigned, unsigned> BBOpcPair =
-    std::make_pair(CurPreheader->getNumber(), MI.getOpcode());
-  DenseMap<std::pair<unsigned, unsigned>,
-    std::vector<const MachineInstr*> >::iterator CI = CSEMap.find(BBOpcPair);
+  unsigned Opcode = MI->getOpcode();
+  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+    CI = CSEMap.find(Opcode);
   bool DoneCSE = false;
   if (CI != CSEMap.end()) {
-    const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo);
+    const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo);
     if (Dup) {
-      DEBUG(errs() << "CSEing " << MI << " with " << *Dup);
-      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = MI.getOperand(i);
+      DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
         if (MO.isReg() && MO.isDef())
           RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
       }
-      MI.eraseFromParent();
+      MI->eraseFromParent();
       DoneCSE = true;
       ++NumCSEed;
     }
@@ -411,15 +506,15 @@ void MachineLICM::Hoist(MachineInstr &MI) {
 
   // Otherwise, splice the instruction to the preheader.
   if (!DoneCSE) {
-    CurPreheader->splice(CurPreheader->getFirstTerminator(),
-                         MI.getParent(), &MI);
+    CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);
+
     // Add to the CSE map.
     if (CI != CSEMap.end())
-      CI->second.push_back(&MI);
+      CI->second.push_back(MI);
     else {
       std::vector<const MachineInstr*> CSEMIs;
-      CSEMIs.push_back(&MI);
-      CSEMap.insert(std::make_pair(BBOpcPair, CSEMIs));
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
     }
   }
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index a00bebb..e040738 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -25,7 +25,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -33,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumSunk, "Number of machine instructions sunk");
 
 namespace {
-  class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass {
+  class MachineSinking : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo  *RegInfo; // Machine register information
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 18a3ead..99812e0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -36,14 +36,13 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN MachineVerifier : public MachineFunctionPass {
+  struct MachineVerifier : public MachineFunctionPass {
     static char ID; // Pass ID, replacement for typeid
 
     MachineVerifier(bool allowDoubleDefs = false) :
@@ -244,7 +243,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   report(msg, MBB->getParent());
   *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr()
       << " " << (void*)MBB
-      << " (#" << MBB->getNumber() << ")\n";
+      << " (BB#" << MBB->getNumber() << ")\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
@@ -746,7 +745,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
            PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
       if (!seen.count(*PrI)) {
         report("Missing PHI operand", BBI);
-        *OS << "MBB #" << (*PrI)->getNumber()
+        *OS << "BB#" << (*PrI)->getNumber()
             << " is a predecessor according to the CFG.\n";
       }
     }
@@ -781,7 +780,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
             report("Live-in physical register is not live-out from predecessor",
                    MFI);
             *OS << "Register " << TRI->getName(*I)
-                << " is not live-out from MBB #" << (*PrI)->getNumber()
+                << " is not live-out from BB#" << (*PrI)->getNumber()
                 << ".\n";
           }
         }
diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp
index f7bc9f3..48db200 100644
--- a/lib/CodeGen/OcamlGC.cpp
+++ b/lib/CodeGen/OcamlGC.cpp
@@ -16,12 +16,11 @@
 
 #include "llvm/CodeGen/GCs.h"
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/Support/Compiler.h"
 
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN OcamlGC : public GCStrategy {
+  class OcamlGC : public GCStrategy {
   public:
     OcamlGC();
   };
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 8fdbe9b..d5edb36 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -19,6 +19,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "post-RA-sched"
+#include "AntiDepBreaker.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
 #include "ExactHazardRecognizer.h"
 #include "SimpleHazardRecognizer.h"
 #include "ScheduleDAGInstrs.h"
@@ -37,10 +40,11 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtarget.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include <map>
 #include <set>
@@ -48,6 +52,7 @@ using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
 STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
 
 // Post-RA scheduling is enabled with
 // TargetSubtarget.enablePostRAScheduler(). This flag can be used to
@@ -56,10 +61,11 @@ static cl::opt<bool>
 EnablePostRAScheduler("post-RA-scheduler",
                        cl::desc("Enable scheduling after register allocation"),
                        cl::init(false), cl::Hidden);
-static cl::opt<bool>
+static cl::opt<std::string>
 EnableAntiDepBreaking("break-anti-dependencies",
-                      cl::desc("Break post-RA scheduling anti-dependencies"),
-                      cl::init(true), cl::Hidden);
+                      cl::desc("Break post-RA scheduling anti-dependencies: "
+                               "\"critical\", \"all\", or \"none\""),
+                      cl::init("none"), cl::Hidden);
 static cl::opt<bool>
 EnablePostRAHazardAvoidance("avoid-hazards",
                       cl::desc("Enable exact hazard avoidance"),
@@ -75,8 +81,10 @@ DebugMod("postra-sched-debugmod",
                       cl::desc("Debug control MBBs that are scheduled"),
                       cl::init(0), cl::Hidden);
 
+AntiDepBreaker::~AntiDepBreaker() { }
+
 namespace {
-  class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
+  class PostRAScheduler : public MachineFunctionPass {
     AliasAnalysis *AA;
     CodeGenOpt::Level OptLevel;
 
@@ -103,7 +111,7 @@ namespace {
   };
   char PostRAScheduler::ID = 0;
 
-  class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs {
+  class SchedulePostRATDList : public ScheduleDAGInstrs {
     /// AvailableQueue - The priority queue to use for the available SUnits.
     ///
     LatencyPriorityQueue AvailableQueue;
@@ -117,56 +125,30 @@ namespace {
     /// Topo - A topological ordering for SUnits.
     ScheduleDAGTopologicalSort Topo;
 
-    /// AllocatableSet - The set of allocatable registers.
-    /// We'll be ignoring anti-dependencies on non-allocatable registers,
-    /// because they may not be safe to break.
-    const BitVector AllocatableSet;
-
     /// HazardRec - The hazard recognizer to use.
     ScheduleHazardRecognizer *HazardRec;
 
+    /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+    AntiDepBreaker *AntiDepBreak;
+
     /// AA - AliasAnalysis for making memory reference queries.
     AliasAnalysis *AA;
 
-    /// AntiDepMode - Anti-dependence breaking mode
-    TargetSubtarget::AntiDepBreakMode AntiDepMode;
-
-    /// Classes - For live regs that are only used in one register class in a
-    /// live range, the register class. If the register is not live, the
-    /// corresponding value is null. If the register is live but used in
-    /// multiple register classes, the corresponding value is -1 casted to a
-    /// pointer.
-    const TargetRegisterClass *
-      Classes[TargetRegisterInfo::FirstVirtualRegister];
-
-    /// RegRegs - Map registers to all their references within a live range.
-    std::multimap<unsigned, MachineOperand *> RegRefs;
-
     /// KillIndices - The index of the most recent kill (proceding bottom-up),
     /// or ~0u if the register is not live.
     unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
 
-    /// DefIndices - The index of the most recent complete def (proceding bottom
-    /// up), or ~0u if the register is live.
-    unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
-
-    /// KeepRegs - A set of registers which are live and cannot be changed to
-    /// break anti-dependencies.
-    SmallSet<unsigned, 4> KeepRegs;
-
   public:
     SchedulePostRATDList(MachineFunction &MF,
                          const MachineLoopInfo &MLI,
                          const MachineDominatorTree &MDT,
                          ScheduleHazardRecognizer *HR,
-                         AliasAnalysis *aa,
-                         TargetSubtarget::AntiDepBreakMode adm)
+                         AntiDepBreaker *ADB,
+                         AliasAnalysis *aa)
       : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
-        AllocatableSet(TRI->getAllocatableSet(MF)),
-      HazardRec(HR), AA(aa), AntiDepMode(adm) {}
+      HazardRec(HR), AntiDepBreak(ADB), AA(aa) {}
 
     ~SchedulePostRATDList() {
-      delete HazardRec;
     }
 
     /// StartBlock - Initialize register live-range state for scheduling in
@@ -178,11 +160,6 @@ namespace {
     ///
     void Schedule();
     
-    /// FixupKills - Fix register kill flags that have been made
-    /// invalid due to scheduling
-    ///
-    void FixupKills(MachineBasicBlock *MBB);
-
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
@@ -192,17 +169,17 @@ namespace {
     ///
     void FinishBlock();
 
+    /// FixupKills - Fix register kill flags that have been made
+    /// invalid due to scheduling
+    ///
+    void FixupKills(MachineBasicBlock *MBB);
+
   private:
-    void PrescanInstruction(MachineInstr *MI);
-    void ScanInstruction(MachineInstr *MI, unsigned Count);
-    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
-    void ReleaseSuccessors(SUnit *SU);
-    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
-    void ListScheduleTopDown();
-    bool BreakAntiDependencies();
-    unsigned findSuitableFreeRegister(unsigned AntiDepReg,
-                                      unsigned LastNewReg,
-                                      const TargetRegisterClass *);
+    void ReleaseSucc(SUnit *SU, SDep *SuccEdge, bool IgnoreAntiDep);
+    void ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep);
+    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, bool IgnoreAntiDep);
+    void ListScheduleTopDown(
+           AntiDepBreaker::CandidateMap *AntiDepCandidates);
     void StartBlockForKills(MachineBasicBlock *BB);
     
     // ToggleKillFlag - Toggle a register operand kill flag. Other
@@ -251,8 +228,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   // Check for antidep breaking override...
   if (EnableAntiDepBreaking.getPosition() > 0) {
-    AntiDepMode = (EnableAntiDepBreaking) ? 
-      TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
+    AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL :
+      (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL :
+      TargetSubtarget::ANTIDEP_NONE;
   }
 
   DEBUG(errs() << "PostRAScheduler\n");
@@ -263,8 +241,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
     (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
     (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
+  AntiDepBreaker *ADB = 
+    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn) :
+     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? 
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
 
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode);
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -276,7 +259,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
       errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
-        ":MBB ID#" << MBB->getNumber() << " ***\n";
+        ":BB#" << MBB->getNumber() << " ***\n";
     }
 #endif
 
@@ -312,6 +295,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     Scheduler.FixupKills(MBB);
   }
 
+  delete HR;
+  delete ADB;
+
   return true;
 }
   
@@ -322,110 +308,72 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
   // Call the superclass.
   ScheduleDAGInstrs::StartBlock(BB);
 
-  // Reset the hazard recognizer.
+  // Reset the hazard recognizer and anti-dep breaker.
   HazardRec->Reset();
-
-  // Clear out the register class data.
-  std::fill(Classes, array_endof(Classes),
-            static_cast<const TargetRegisterClass *>(0));
-
-  // Initialize the indices to indicate that no registers are live.
-  std::fill(KillIndices, array_endof(KillIndices), ~0u);
-  std::fill(DefIndices, array_endof(DefIndices), BB->size());
-
-  // Clear "do not change" set.
-  KeepRegs.clear();
-
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
-
-  // Determine the live-out physregs for this block.
-  if (IsReturnBlock) {
-    // In a return block, examine the function live-out regs.
-    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
-         E = MRI.liveout_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
-      DefIndices[Reg] = ~0u;
-      // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        unsigned AliasReg = *Alias;
-        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
-        DefIndices[AliasReg] = ~0u;
-      }
-    }
-  } else {
-    // In a non-return block, examine the live-in regs of all successors.
-    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-         SE = BB->succ_end(); SI != SE; ++SI)
-      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-           E = (*SI)->livein_end(); I != E; ++I) {
-        unsigned Reg = *I;
-        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[Reg] = BB->size();
-        DefIndices[Reg] = ~0u;
-        // Repeat, for all aliases.
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-          KillIndices[AliasReg] = BB->size();
-          DefIndices[AliasReg] = ~0u;
-        }
-      }
-  }
-
-  // Mark live-out callee-saved registers. In a return block this is
-  // all callee-saved registers. In non-return this is any
-  // callee-saved register that is not saved in the prolog.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
-    unsigned Reg = *I;
-    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
-    Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-    KillIndices[Reg] = BB->size();
-    DefIndices[Reg] = ~0u;
-    // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-      unsigned AliasReg = *Alias;
-      Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[AliasReg] = BB->size();
-      DefIndices[AliasReg] = ~0u;
-    }
-  }
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->StartBlock(BB);
 }
 
 /// Schedule - Schedule the instruction range using list scheduling.
 ///
 void SchedulePostRATDList::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
-  
   // Build the scheduling graph.
   BuildSchedGraph(AA);
 
-  if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) {
-    if (BreakAntiDependencies()) {
+  if (AntiDepBreak != NULL) {
+    AntiDepBreaker::CandidateMap AntiDepCandidates;
+    const bool NeedCandidates = AntiDepBreak->NeedCandidates();
+    
+    for (unsigned i = 0, Trials = AntiDepBreak->GetMaxTrials();
+         i < Trials; ++i) {
+      DEBUG(errs() << "\n********** Break Anti-Deps, Trial " << 
+            i << " **********\n");
+      
+      // If candidates are required, then schedule forward ignoring
+      // anti-dependencies to collect the candidate operands for
+      // anti-dependence breaking. The candidates will be the def
+      // operands for the anti-dependencies that if broken would allow
+      // an improved schedule
+      if (NeedCandidates) {
+        DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+                SUnits[su].dumpAll(this));
+
+        AntiDepCandidates.clear();
+        AvailableQueue.initNodes(SUnits);
+        ListScheduleTopDown(&AntiDepCandidates);
+        AvailableQueue.releaseState();
+      }
+
+      unsigned Broken = 
+        AntiDepBreak->BreakAntiDependencies(SUnits, AntiDepCandidates,
+                                            Begin, InsertPos, InsertPosIndex);
+
       // We made changes. Update the dependency graph.
       // Theoretically we could update the graph in place:
       // When a live range is changed to use a different register, remove
       // the def's anti-dependence *and* output-dependence edges due to
       // that register, and add new anti-dependence and output-dependence
       // edges based on the next live range of the register.
-      SUnits.clear();
-      EntrySU = SUnit();
-      ExitSU = SUnit();
-      BuildSchedGraph(AA);
+      if ((Broken != 0) || NeedCandidates) {
+        SUnits.clear();
+        Sequence.clear();
+        EntrySU = SUnit();
+        ExitSU = SUnit();
+        BuildSchedGraph(AA);
+      }
+
+      NumFixedAnti += Broken;
+      if (Broken == 0)
+        break;
     }
   }
 
+  DEBUG(errs() << "********** List Scheduling **********\n");
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
   AvailableQueue.initNodes(SUnits);
-
-  ListScheduleTopDown();
-  
+  ListScheduleTopDown(NULL);
   AvailableQueue.releaseState();
 }
 
@@ -433,436 +381,20 @@ void SchedulePostRATDList::Schedule() {
 /// instruction, which will not be scheduled.
 ///
 void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
-  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
-
-  // Any register which was defined within the previous scheduling region
-  // may have been rescheduled and its lifetime may overlap with registers
-  // in ways not reflected in our current liveness state. For each such
-  // register, adjust the liveness state to be conservatively correct.
-  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
-    if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
-      assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
-      // Mark this register to be non-renamable.
-      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      // Move the def index to the end of the previous region, to reflect
-      // that the def could theoretically have been scheduled at the end.
-      DefIndices[Reg] = InsertPosIndex;
-    }
-
-  PrescanInstruction(MI);
-  ScanInstruction(MI, Count);
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->Observe(MI, Count, InsertPosIndex);
 }
 
 /// FinishBlock - Clean up register live-range state.
 ///
 void SchedulePostRATDList::FinishBlock() {
-  RegRefs.clear();
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->FinishBlock();
 
   // Call the superclass.
   ScheduleDAGInstrs::FinishBlock();
 }
 
-/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
-/// critical path.
-static SDep *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
-  unsigned NextDepth = 0;
-  // Find the predecessor edge with the greatest depth.
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
-       P != PE; ++P) {
-    SUnit *PredSU = P->getSUnit();
-    unsigned PredLatency = P->getLatency();
-    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
-    // In the case of a latency tie, prefer an anti-dependency edge over
-    // other types of edges.
-    if (NextDepth < PredTotalLatency ||
-        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
-      NextDepth = PredTotalLatency;
-      Next = &*P;
-    }
-  }
-  return Next;
-}
-
-void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
-  // Scan the register operands for this instruction and update
-  // Classes and RegRefs.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg()) continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == 0) continue;
-    const TargetRegisterClass *NewRC = 0;
-    
-    if (i < MI->getDesc().getNumOperands())
-      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
-
-    // For now, only allow the register to be changed if its register
-    // class is consistent across all uses.
-    if (!Classes[Reg] && NewRC)
-      Classes[Reg] = NewRC;
-    else if (!NewRC || Classes[Reg] != NewRC)
-      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-
-    // Now check for aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-      // If an alias of the reg is used during the live range, give up.
-      // Note that this allows us to skip checking if AntiDepReg
-      // overlaps with any of the aliases, among other things.
-      unsigned AliasReg = *Alias;
-      if (Classes[AliasReg]) {
-        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      }
-    }
-
-    // If we're still willing to consider this register, note the reference.
-    if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
-      RegRefs.insert(std::make_pair(Reg, &MO));
-
-    // It's not safe to change register allocation for source operands of
-    // that have special allocation requirements.
-    if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
-      if (KeepRegs.insert(Reg)) {
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-             *Subreg; ++Subreg)
-          KeepRegs.insert(*Subreg);
-      }
-    }
-  }
-}
-
-void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
-                                           unsigned Count) {
-  // Update liveness.
-  // Proceding upwards, registers that are defed but not used in this
-  // instruction are now dead.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg()) continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == 0) continue;
-    if (!MO.isDef()) continue;
-    // Ignore two-addr defs.
-    if (MI->isRegTiedToUseOperand(i)) continue;
-
-    DefIndices[Reg] = Count;
-    KillIndices[Reg] = ~0u;
-    assert(((KillIndices[Reg] == ~0u) !=
-            (DefIndices[Reg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for Reg!");
-    KeepRegs.erase(Reg);
-    Classes[Reg] = 0;
-    RegRefs.erase(Reg);
-    // Repeat, for all subregs.
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-         *Subreg; ++Subreg) {
-      unsigned SubregReg = *Subreg;
-      DefIndices[SubregReg] = Count;
-      KillIndices[SubregReg] = ~0u;
-      KeepRegs.erase(SubregReg);
-      Classes[SubregReg] = 0;
-      RegRefs.erase(SubregReg);
-    }
-    // Conservatively mark super-registers as unusable.
-    for (const unsigned *Super = TRI->getSuperRegisters(Reg);
-         *Super; ++Super) {
-      unsigned SuperReg = *Super;
-      Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-    }
-  }
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg()) continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == 0) continue;
-    if (!MO.isUse()) continue;
-
-    const TargetRegisterClass *NewRC = 0;
-    if (i < MI->getDesc().getNumOperands())
-      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
-
-    // For now, only allow the register to be changed if its register
-    // class is consistent across all uses.
-    if (!Classes[Reg] && NewRC)
-      Classes[Reg] = NewRC;
-    else if (!NewRC || Classes[Reg] != NewRC)
-      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-
-    RegRefs.insert(std::make_pair(Reg, &MO));
-
-    // It wasn't previously live but now it is, this is a kill.
-    if (KillIndices[Reg] == ~0u) {
-      KillIndices[Reg] = Count;
-      DefIndices[Reg] = ~0u;
-          assert(((KillIndices[Reg] == ~0u) !=
-                  (DefIndices[Reg] == ~0u)) &&
-               "Kill and Def maps aren't consistent for Reg!");
-    }
-    // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-      unsigned AliasReg = *Alias;
-      if (KillIndices[AliasReg] == ~0u) {
-        KillIndices[AliasReg] = Count;
-        DefIndices[AliasReg] = ~0u;
-      }
-    }
-  }
-}
-
-unsigned
-SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg,
-                                               unsigned LastNewReg,
-                                               const TargetRegisterClass *RC) {
-  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
-       RE = RC->allocation_order_end(MF); R != RE; ++R) {
-    unsigned NewReg = *R;
-    // Don't replace a register with itself.
-    if (NewReg == AntiDepReg) continue;
-    // Don't replace a register with one that was recently used to repair
-    // an anti-dependence with this AntiDepReg, because that would
-    // re-introduce that anti-dependence.
-    if (NewReg == LastNewReg) continue;
-    // If NewReg is dead and NewReg's most recent def is not before
-    // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
-    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for AntiDepReg!");
-    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for NewReg!");
-    if (KillIndices[NewReg] != ~0u ||
-        Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
-        KillIndices[AntiDepReg] > DefIndices[NewReg])
-      continue;
-    return NewReg;
-  }
-
-  // No registers are free and available!
-  return 0;
-}
-
-/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
-/// of the ScheduleDAG and break them by renaming registers.
-///
-bool SchedulePostRATDList::BreakAntiDependencies() {
-  // The code below assumes that there is at least one instruction,
-  // so just duck out immediately if the block is empty.
-  if (SUnits.empty()) return false;
-
-  // Find the node at the bottom of the critical path.
-  SUnit *Max = 0;
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
-      Max = SU;
-  }
-
-#ifndef NDEBUG
-  {
-    DEBUG(errs() << "Critical path has total latency "
-          << (Max->getDepth() + Max->Latency) << "\n");
-    DEBUG(errs() << "Available regs:");
-    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
-      if (KillIndices[Reg] == ~0u)
-        DEBUG(errs() << " " << TRI->getName(Reg));
-    }
-    DEBUG(errs() << '\n');
-  }
-#endif
-
-  // Track progress along the critical path through the SUnit graph as we walk
-  // the instructions.
-  SUnit *CriticalPathSU = Max;
-  MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
-
-  // Consider this pattern:
-  //   A = ...
-  //   ... = A
-  //   A = ...
-  //   ... = A
-  //   A = ...
-  //   ... = A
-  //   A = ...
-  //   ... = A
-  // There are three anti-dependencies here, and without special care,
-  // we'd break all of them using the same register:
-  //   A = ...
-  //   ... = A
-  //   B = ...
-  //   ... = B
-  //   B = ...
-  //   ... = B
-  //   B = ...
-  //   ... = B
-  // because at each anti-dependence, B is the first register that
-  // isn't A which is free.  This re-introduces anti-dependencies
-  // at all but one of the original anti-dependencies that we were
-  // trying to break.  To avoid this, keep track of the most recent
-  // register that each register was replaced with, avoid
-  // using it to repair an anti-dependence on the same register.
-  // This lets us produce this:
-  //   A = ...
-  //   ... = A
-  //   B = ...
-  //   ... = B
-  //   C = ...
-  //   ... = C
-  //   B = ...
-  //   ... = B
-  // This still has an anti-dependence on B, but at least it isn't on the
-  // original critical path.
-  //
-  // TODO: If we tracked more than one register here, we could potentially
-  // fix that remaining critical edge too. This is a little more involved,
-  // because unlike the most recent register, less recent registers should
-  // still be considered, though only if no other registers are available.
-  unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
-
-  // Attempt to break anti-dependence edges on the critical path. Walk the
-  // instructions from the bottom up, tracking information about liveness
-  // as we go to help determine which registers are available.
-  bool Changed = false;
-  unsigned Count = InsertPosIndex - 1;
-  for (MachineBasicBlock::iterator I = InsertPos, E = Begin;
-       I != E; --Count) {
-    MachineInstr *MI = --I;
-
-    // Check if this instruction has a dependence on the critical path that
-    // is an anti-dependence that we may be able to break. If it is, set
-    // AntiDepReg to the non-zero register associated with the anti-dependence.
-    //
-    // We limit our attention to the critical path as a heuristic to avoid
-    // breaking anti-dependence edges that aren't going to significantly
-    // impact the overall schedule. There are a limited number of registers
-    // and we want to save them for the important edges.
-    // 
-    // TODO: Instructions with multiple defs could have multiple
-    // anti-dependencies. The current code here only knows how to break one
-    // edge per instruction. Note that we'd have to be able to break all of
-    // the anti-dependencies in an instruction in order to be effective.
-    unsigned AntiDepReg = 0;
-    if (MI == CriticalPathMI) {
-      if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
-        SUnit *NextSU = Edge->getSUnit();
-
-        // Only consider anti-dependence edges.
-        if (Edge->getKind() == SDep::Anti) {
-          AntiDepReg = Edge->getReg();
-          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!AllocatableSet.test(AntiDepReg))
-            // Don't break anti-dependencies on non-allocatable registers.
-            AntiDepReg = 0;
-          else if (KeepRegs.count(AntiDepReg))
-            // Don't break anti-dependencies if an use down below requires
-            // this exact register.
-            AntiDepReg = 0;
-          else {
-            // If the SUnit has other dependencies on the SUnit that it
-            // anti-depends on, don't bother breaking the anti-dependency
-            // since those edges would prevent such units from being
-            // scheduled past each other regardless.
-            //
-            // Also, if there are dependencies on other SUnits with the
-            // same register as the anti-dependency, don't attempt to
-            // break it.
-            for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
-                 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
-              if (P->getSUnit() == NextSU ?
-                    (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
-                    (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
-                AntiDepReg = 0;
-                break;
-              }
-          }
-        }
-        CriticalPathSU = NextSU;
-        CriticalPathMI = CriticalPathSU->getInstr();
-      } else {
-        // We've reached the end of the critical path.
-        CriticalPathSU = 0;
-        CriticalPathMI = 0;
-      }
-    }
-
-    PrescanInstruction(MI);
-
-    if (MI->getDesc().hasExtraDefRegAllocReq())
-      // If this instruction's defs have special allocation requirement, don't
-      // break this anti-dependency.
-      AntiDepReg = 0;
-    else if (AntiDepReg) {
-      // If this instruction has a use of AntiDepReg, breaking it
-      // is invalid.
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (!MO.isReg()) continue;
-        unsigned Reg = MO.getReg();
-        if (Reg == 0) continue;
-        if (MO.isUse() && AntiDepReg == Reg) {
-          AntiDepReg = 0;
-          break;
-        }
-      }
-    }
-
-    // Determine AntiDepReg's register class, if it is live and is
-    // consistently used within a single class.
-    const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
-    assert((AntiDepReg == 0 || RC != NULL) &&
-           "Register should be live if it's causing an anti-dependence!");
-    if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
-      AntiDepReg = 0;
-
-    // Look for a suitable register to use to break the anti-depenence.
-    //
-    // TODO: Instead of picking the first free register, consider which might
-    // be the best.
-    if (AntiDepReg != 0) {
-      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
-                                                     LastNewReg[AntiDepReg],
-                                                     RC)) {
-        DEBUG(errs() << "Breaking anti-dependence edge on "
-              << TRI->getName(AntiDepReg)
-              << " with " << RegRefs.count(AntiDepReg) << " references"
-              << " using " << TRI->getName(NewReg) << "!\n");
-
-        // Update the references to the old register to refer to the new
-        // register.
-        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
-                  std::multimap<unsigned, MachineOperand *>::iterator>
-           Range = RegRefs.equal_range(AntiDepReg);
-        for (std::multimap<unsigned, MachineOperand *>::iterator
-             Q = Range.first, QE = Range.second; Q != QE; ++Q)
-          Q->second->setReg(NewReg);
-
-        // We just went back in time and modified history; the
-        // liveness information for the anti-depenence reg is now
-        // inconsistent. Set the state as if it were dead.
-        Classes[NewReg] = Classes[AntiDepReg];
-        DefIndices[NewReg] = DefIndices[AntiDepReg];
-        KillIndices[NewReg] = KillIndices[AntiDepReg];
-        assert(((KillIndices[NewReg] == ~0u) !=
-                (DefIndices[NewReg] == ~0u)) &&
-             "Kill and Def maps aren't consistent for NewReg!");
-
-        Classes[AntiDepReg] = 0;
-        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
-        KillIndices[AntiDepReg] = ~0u;
-        assert(((KillIndices[AntiDepReg] == ~0u) !=
-                (DefIndices[AntiDepReg] == ~0u)) &&
-             "Kill and Def maps aren't consistent for AntiDepReg!");
-
-        RegRefs.erase(AntiDepReg);
-        Changed = true;
-        LastNewReg[AntiDepReg] = NewReg;
-      }
-    }
-
-    ScanInstruction(MI, Count);
-  }
-
-  return Changed;
-}
-
 /// StartBlockForKills - Initialize register live-range state for updating kills
 ///
 void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
@@ -941,7 +473,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
 /// incorrect by instruction reordering.
 ///
 void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
-  DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
+  DEBUG(errs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
   std::set<unsigned> killedRegs;
   BitVector ReservedRegs = TRI->getReservedRegs(MF);
@@ -1040,7 +572,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
+                                       bool IgnoreAntiDep) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
 #ifndef NDEBUG
@@ -1056,7 +589,8 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
-  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+  SuccSU->setDepthToAtLeast(SU->getDepth(IgnoreAntiDep) +
+                            SuccEdge->getLatency(), IgnoreAntiDep);
   
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
@@ -1065,40 +599,73 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
 }
 
 /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
-void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I)
-    ReleaseSucc(SU, &*I);
+       I != E; ++I) {
+    if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+    ReleaseSucc(SU, &*I, IgnoreAntiDep);
+  }
 }
 
 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
-void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle,
+                                               bool IgnoreAntiDep) {
   DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
-  SU->setDepthToAtLeast(CurCycle);
+  assert(CurCycle >= SU->getDepth(IgnoreAntiDep) && 
+         "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle, IgnoreAntiDep);
 
-  ReleaseSuccessors(SU);
+  ReleaseSuccessors(SU, IgnoreAntiDep);
   SU->isScheduled = true;
   AvailableQueue.ScheduledNode(SU);
 }
 
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
-void SchedulePostRATDList::ListScheduleTopDown() {
+void SchedulePostRATDList::ListScheduleTopDown(
+                   AntiDepBreaker::CandidateMap *AntiDepCandidates) {
   unsigned CurCycle = 0;
+  const bool IgnoreAntiDep = (AntiDepCandidates != NULL);
+  
+  // We're scheduling top-down but we're visiting the regions in
+  // bottom-up order, so we don't know the hazards at the start of a
+  // region. So assume no hazards (this should usually be ok as most
+  // blocks are a single region).
+  HazardRec->Reset();
+
+  // If ignoring anti-dependencies, the Schedule DAG still has Anti
+  // dep edges, but we ignore them for scheduling purposes
+  AvailableQueue.setIgnoreAntiDep(IgnoreAntiDep);
 
   // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU);
+  ReleaseSuccessors(&EntrySU, IgnoreAntiDep);
 
-  // All leaves to Available queue.
+  // Add all leaves to Available queue. If ignoring antideps we also
+  // adjust the predecessor count for each node to not include antidep
+  // edges.
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     // It is available if it has no predecessors.
-    if (SUnits[i].Preds.empty()) {
+    bool available = SUnits[i].Preds.empty();
+    // If we are ignoring anti-dependencies then a node that has only
+    // anti-dep predecessors is available.
+    if (!available && IgnoreAntiDep) {
+      available = true;
+      for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(),
+             E = SUnits[i].Preds.end(); I != E; ++I) {
+        if (I->getKind() != SDep::Anti) {
+          available = false;
+        } else {
+          SUnits[i].NumPredsLeft -= 1;
+        }
+      }
+    }
+
+    if (available) {
       AvailableQueue.push(&SUnits[i]);
       SUnits[i].isAvailable = true;
     }
@@ -1117,26 +684,25 @@ void SchedulePostRATDList::ListScheduleTopDown() {
     // so, add them to the available queue.
     unsigned MinDepth = ~0u;
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->getDepth() <= CurCycle) {
+      if (PendingQueue[i]->getDepth(IgnoreAntiDep) <= CurCycle) {
         AvailableQueue.push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else if (PendingQueue[i]->getDepth() < MinDepth)
-        MinDepth = PendingQueue[i]->getDepth();
+      } else if (PendingQueue[i]->getDepth(IgnoreAntiDep) < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth(IgnoreAntiDep);
     }
 
     DEBUG(errs() << "\n*** Examining Available\n";
           LatencyPriorityQueue q = AvailableQueue;
           while (!q.empty()) {
             SUnit *su = q.pop();
-            errs() << "Height " << su->getHeight() << ": ";
+            errs() << "Height " << su->getHeight(IgnoreAntiDep) << ": ";
             su->dump(this);
           });
 
     SUnit *FoundSUnit = 0;
-
     bool HasNoopHazards = false;
     while (!AvailableQueue.empty()) {
       SUnit *CurSUnit = AvailableQueue.pop();
@@ -1160,9 +726,30 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       NotReady.clear();
     }
 
-    // If we found a node to schedule, do it now.
+    // If we found a node to schedule...
     if (FoundSUnit) {
-      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      // If we are ignoring anti-dependencies and the SUnit we are
+      // scheduling has an antidep predecessor that has not been
+      // scheduled, then we will need to break that antidep if we want
+      // to get this schedule when not ignoring anti-dependencies.
+      if (IgnoreAntiDep) {
+        AntiDepBreaker::AntiDepRegVector AntiDepRegs;
+        for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(),
+               E = FoundSUnit->Preds.end(); I != E; ++I) {
+          if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled)
+            AntiDepRegs.push_back(I->getReg());
+        }
+        
+        if (AntiDepRegs.size() > 0) {
+          DEBUG(errs() << "*** AntiDep Candidate: ");
+          DEBUG(FoundSUnit->dump(this));
+          AntiDepCandidates->insert(
+            AntiDepBreaker::CandidateMap::value_type(FoundSUnit, AntiDepRegs));
+        }
+      }
+
+      // ... schedule the node...
+      ScheduleNodeTopDown(FoundSUnit, CurCycle, IgnoreAntiDep);
       HazardRec->EmitInstruction(FoundSUnit);
       CycleHasInsts = true;
 
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 726869a..cce5ae8 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -50,13 +50,14 @@ STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
 STATISTIC(NumDeadSpills, "Number of dead spills removed");
 
 namespace {
-  class VISIBILITY_HIDDEN PreAllocSplitting : public MachineFunctionPass {
+  class PreAllocSplitting : public MachineFunctionPass {
     MachineFunction       *CurrMF;
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo* TRI;
     MachineFrameInfo      *MFI;
     MachineRegisterInfo   *MRI;
+    SlotIndexes           *SIs;
     LiveIntervals         *LIs;
     LiveStacks            *LSs;
     VirtRegMap            *VRM;
@@ -68,7 +69,7 @@ namespace {
     MachineBasicBlock     *BarrierMBB;
 
     // Barrier - Current barrier index.
-    LiveIndex     BarrierIdx;
+    SlotIndex     BarrierIdx;
 
     // CurrLI - Current live interval being split.
     LiveInterval          *CurrLI;
@@ -83,16 +84,19 @@ namespace {
     DenseMap<unsigned, int> IntervalSSMap;
 
     // Def2SpillMap - A map from a def instruction index to spill index.
-    DenseMap<LiveIndex, LiveIndex> Def2SpillMap;
+    DenseMap<SlotIndex, SlotIndex> Def2SpillMap;
 
   public:
     static char ID;
-    PreAllocSplitting() : MachineFunctionPass(&ID) {}
+    PreAllocSplitting()
+      : MachineFunctionPass(&ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
       AU.addRequired<LiveIntervals>();
       AU.addPreserved<LiveIntervals>();
       AU.addRequired<LiveStacks>();
@@ -129,23 +133,23 @@ namespace {
   private:
     MachineBasicBlock::iterator
       findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
-                        LiveIndex&);
+                        SlotIndex&);
 
     MachineBasicBlock::iterator
       findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
-                     SmallPtrSet<MachineInstr*, 4>&, LiveIndex&);
+                     SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
 
     MachineBasicBlock::iterator
-      findRestorePoint(MachineBasicBlock*, MachineInstr*, LiveIndex,
-                     SmallPtrSet<MachineInstr*, 4>&, LiveIndex&);
+      findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
+                     SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
 
     int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
 
     bool IsAvailableInStack(MachineBasicBlock*, unsigned,
-                            LiveIndex, LiveIndex,
-                            LiveIndex&, int&) const;
+                            SlotIndex, SlotIndex,
+                            SlotIndex&, int&) const;
 
-    void UpdateSpillSlotInterval(VNInfo*, LiveIndex, LiveIndex);
+    void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex);
 
     bool SplitRegLiveInterval(LiveInterval*);
 
@@ -157,7 +161,7 @@ namespace {
     bool Rematerialize(unsigned vreg, VNInfo* ValNo,
                        MachineInstr* DefMI,
                        MachineBasicBlock::iterator RestorePt,
-                       LiveIndex RestoreIdx,
+                       SlotIndex RestoreIdx,
                        SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
     MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
                             MachineInstr* DefMI,
@@ -209,12 +213,12 @@ const PassInfo *const llvm::PreAllocSplittingID = &X;
 /// instruction index map. If there isn't one, return end().
 MachineBasicBlock::iterator
 PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
-                                     LiveIndex &SpotIndex) {
+                                     SlotIndex &SpotIndex) {
   MachineBasicBlock::iterator MII = MI;
   if (++MII != MBB->end()) {
-    LiveIndex Index =
+    SlotIndex Index =
       LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
-    if (Index != LiveIndex()) {
+    if (Index != SlotIndex()) {
       SpotIndex = Index;
       return MII;
     }
@@ -230,7 +234,7 @@ MachineBasicBlock::iterator
 PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
                                   MachineInstr *DefMI,
                                   SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                  LiveIndex &SpillIndex) {
+                                  SlotIndex &SpillIndex) {
   MachineBasicBlock::iterator Pt = MBB->begin();
 
   MachineBasicBlock::iterator MII = MI;
@@ -243,7 +247,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
   if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
     
   while (MII != EndPt && !RefsInMBB.count(MII)) {
-    LiveIndex Index = LIs->getInstructionIndex(MII);
+    SlotIndex Index = LIs->getInstructionIndex(MII);
     
     // We can't insert the spill between the barrier (a call), and its
     // corresponding call frame setup.
@@ -276,9 +280,9 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
 /// found.
 MachineBasicBlock::iterator
 PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                    LiveIndex LastIdx,
+                                    SlotIndex LastIdx,
                                     SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
-                                    LiveIndex &RestoreIndex) {
+                                    SlotIndex &RestoreIndex) {
   // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
   // begin index accordingly.
   MachineBasicBlock::iterator Pt = MBB->end();
@@ -299,10 +303,10 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
   // FIXME: Limit the number of instructions to examine to reduce
   // compile time?
   while (MII != EndPt) {
-    LiveIndex Index = LIs->getInstructionIndex(MII);
+    SlotIndex Index = LIs->getInstructionIndex(MII);
     if (Index > LastIdx)
       break;
-    LiveIndex Gap = LIs->findGapBeforeInstr(Index);
+    SlotIndex Gap = LIs->findGapBeforeInstr(Index);
       
     // We can't insert a restore between the barrier (a call) and its 
     // corresponding call frame teardown.
@@ -311,7 +315,7 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
         if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
         ++MII;
       } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
-    } else if (Gap != LiveIndex()) {
+    } else if (Gap != SlotIndex()) {
       Pt = MII;
       RestoreIndex = Gap;
     }
@@ -344,7 +348,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false,
+    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
                                        LSs->getVNInfoAllocator());
   return SS;
 }
@@ -353,9 +357,9 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
 /// slot at the specified index.
 bool
 PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
-                                    unsigned Reg, LiveIndex DefIndex,
-                                    LiveIndex RestoreIndex,
-                                    LiveIndex &SpillIndex,
+                                    unsigned Reg, SlotIndex DefIndex,
+                                    SlotIndex RestoreIndex,
+                                    SlotIndex &SpillIndex,
                                     int& SS) const {
   if (!DefMBB)
     return false;
@@ -363,7 +367,7 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
   DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
   if (I == IntervalSSMap.end())
     return false;
-  DenseMap<LiveIndex, LiveIndex>::iterator
+  DenseMap<SlotIndex, SlotIndex>::iterator
     II = Def2SpillMap.find(DefIndex);
   if (II == Def2SpillMap.end())
     return false;
@@ -384,8 +388,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
 /// interval being split, and the spill and restore indicies, update the live
 /// interval of the spill stack slot.
 void
-PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex,
-                                           LiveIndex RestoreIndex) {
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex,
+                                           SlotIndex RestoreIndex) {
   assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
          "Expect restore in the barrier mbb");
 
@@ -398,8 +402,8 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex,
   }
 
   SmallPtrSet<MachineBasicBlock*, 4> Processed;
-  LiveIndex EndIdx = LIs->getMBBEndIdx(MBB);
-  LiveRange SLR(SpillIndex, LIs->getNextSlot(EndIdx), CurrSValNo);
+  SlotIndex EndIdx = LIs->getMBBEndIdx(MBB);
+  LiveRange SLR(SpillIndex, EndIdx.getNextSlot(), CurrSValNo);
   CurrSLI->addRange(SLR);
   Processed.insert(MBB);
 
@@ -418,7 +422,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex,
     WorkList.pop_back();
     if (Processed.count(MBB))
       continue;
-    LiveIndex Idx = LIs->getMBBStartIdx(MBB);
+    SlotIndex Idx = LIs->getMBBStartIdx(MBB);
     LR = CurrLI->getLiveRangeContaining(Idx);
     if (LR && LR->valno == ValNo) {
       EndIdx = LIs->getMBBEndIdx(MBB);
@@ -428,7 +432,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex,
         CurrSLI->addRange(SLR);
       } else if (LR->end > EndIdx) {
         // Live range extends beyond end of mbb, process successors.
-        LiveRange SLR(Idx, LIs->getNextIndex(EndIdx), CurrSValNo);
+        LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo);
         CurrSLI->addRange(SLR);
         for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
                SE = MBB->succ_end(); SI != SE; ++SI)
@@ -491,12 +495,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     }
     
     // Once we've found it, extend its VNInfo to our instruction.
-    LiveIndex DefIndex = LIs->getInstructionIndex(Walker);
-    DefIndex = LIs->getDefIndex(DefIndex);
-    LiveIndex EndIndex = LIs->getMBBEndIdx(MBB);
+    SlotIndex DefIndex = LIs->getInstructionIndex(Walker);
+    DefIndex = DefIndex.getDefIndex();
+    SlotIndex EndIndex = LIs->getMBBEndIdx(MBB);
     
     RetVNI = NewVNs[Walker];
-    LI->addRange(LiveRange(DefIndex, LIs->getNextSlot(EndIndex), RetVNI));
+    LI->addRange(LiveRange(DefIndex, EndIndex.getNextSlot(), RetVNI));
   } else if (!ContainsDefs && ContainsUses) {
     SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
     
@@ -528,12 +532,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
                                               IsTopLevel, IsIntraBlock);
     }
 
-    LiveIndex UseIndex = LIs->getInstructionIndex(Walker);
-    UseIndex = LIs->getUseIndex(UseIndex);
-    LiveIndex EndIndex;
+    SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
+    UseIndex = UseIndex.getUseIndex();
+    SlotIndex EndIndex;
     if (IsIntraBlock) {
       EndIndex = LIs->getInstructionIndex(UseI);
-      EndIndex = LIs->getUseIndex(EndIndex);
+      EndIndex = EndIndex.getUseIndex();
     } else
       EndIndex = LIs->getMBBEndIdx(MBB);
 
@@ -542,7 +546,7 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
                                     NewVNs, LiveOut, Phis, false, true);
     
-    LI->addRange(LiveRange(UseIndex, LIs->getNextSlot(EndIndex), RetVNI));
+    LI->addRange(LiveRange(UseIndex, EndIndex.getNextSlot(), RetVNI));
     
     // FIXME: Need to set kills properly for inter-block stuff.
     if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex);
@@ -588,13 +592,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
                                               IsTopLevel, IsIntraBlock);
     }
 
-    LiveIndex StartIndex = LIs->getInstructionIndex(Walker);
-    StartIndex = foundDef ? LIs->getDefIndex(StartIndex) :
-                            LIs->getUseIndex(StartIndex);
-    LiveIndex EndIndex;
+    SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
+    StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
+    SlotIndex EndIndex;
     if (IsIntraBlock) {
       EndIndex = LIs->getInstructionIndex(UseI);
-      EndIndex = LIs->getUseIndex(EndIndex);
+      EndIndex = EndIndex.getUseIndex();
     } else
       EndIndex = LIs->getMBBEndIdx(MBB);
 
@@ -604,7 +607,7 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
       RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
                                       NewVNs, LiveOut, Phis, false, true);
 
-    LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI));
+    LI->addRange(LiveRange(StartIndex, EndIndex.getNextSlot(), RetVNI));
     
     if (foundUse && RetVNI->isKill(StartIndex))
       RetVNI->removeKill(StartIndex);
@@ -640,9 +643,9 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
   // assume that we are not intrablock here.
   if (Phis.count(MBB)) return Phis[MBB]; 
 
-  LiveIndex StartIndex = LIs->getMBBStartIdx(MBB);
+  SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
   VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(LiveIndex(), /*FIXME*/ 0, false,
+    LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false,
                      LIs->getVNInfoAllocator());
 
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
@@ -685,19 +688,19 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
     for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
            IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
       I->second->setHasPHIKill(true);
-      LiveIndex KillIndex = LIs->getMBBEndIdx(I->first);
+      SlotIndex KillIndex = LIs->getMBBEndIdx(I->first);
       if (!I->second->isKill(KillIndex))
         I->second->addKill(KillIndex);
     }
   }
       
-  LiveIndex EndIndex;
+  SlotIndex EndIndex;
   if (IsIntraBlock) {
     EndIndex = LIs->getInstructionIndex(UseI);
-    EndIndex = LIs->getUseIndex(EndIndex);
+    EndIndex = EndIndex.getUseIndex();
   } else
     EndIndex = LIs->getMBBEndIdx(MBB);
-  LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI));
+  LI->addRange(LiveRange(StartIndex, EndIndex.getNextSlot(), RetVNI));
   if (IsIntraBlock)
     RetVNI->addKill(EndIndex);
 
@@ -733,8 +736,8 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
        DE = MRI->def_end(); DI != DE; ++DI) {
     Defs[(*DI).getParent()].insert(&*DI);
     
-    LiveIndex DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = LIs->getDefIndex(DefIdx);
+    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = DefIdx.getDefIndex();
     
     assert(DI->getOpcode() != TargetInstrInfo::PHI &&
            "Following NewVN isPHIDef flag incorrect. Fix me!");
@@ -769,13 +772,13 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
   // Add ranges for dead defs
   for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
        DE = MRI->def_end(); DI != DE; ++DI) {
-    LiveIndex DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = LIs->getDefIndex(DefIdx);
+    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = DefIdx.getDefIndex();
     
     if (LI->liveAt(DefIdx)) continue;
     
     VNInfo* DeadVN = NewVNs[&*DI];
-    LI->addRange(LiveRange(DefIdx, LIs->getNextSlot(DefIdx), DeadVN));
+    LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
     DeadVN->addKill(DefIdx);
   }
 
@@ -784,8 +787,8 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
        VI != VE; ++VI) {
     VNInfo* VNI = *VI;
     for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) {
-      LiveIndex KillIdx = VNI->kills[i];
-      if (KillIdx.isPHIIndex())
+      SlotIndex KillIdx = VNI->kills[i];
+      if (KillIdx.isPHI())
         continue;
       MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx);
       if (KillMI) {
@@ -826,14 +829,14 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     // Locate two-address redefinitions
     for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(),
          KE = OldVN->kills.end(); KI != KE; ++KI) {
-      assert(!KI->isPHIIndex() &&
+      assert(!KI->isPHI() &&
              "VN previously reported having no PHI kills.");
       MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
       unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
       if (DefIdx == ~0U) continue;
       if (MI->isRegTiedToUseOperand(DefIdx)) {
         VNInfo* NextVN =
-          CurrLI->findDefinedVNInfoForRegInt(LIs->getDefIndex(*KI));
+          CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex());
         if (NextVN == OldVN) continue;
         Stack.push_back(NextVN);
       }
@@ -865,10 +868,10 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
          E = MRI->reg_end(); I != E; ++I) {
     MachineOperand& MO = I.getOperand();
-    LiveIndex InstrIdx = LIs->getInstructionIndex(&*I);
+    SlotIndex InstrIdx = LIs->getInstructionIndex(&*I);
     
-    if ((MO.isUse() && NewLI.liveAt(LIs->getUseIndex(InstrIdx))) ||
-        (MO.isDef() && NewLI.liveAt(LIs->getDefIndex(InstrIdx))))
+    if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) ||
+        (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex())))
       OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
   }
   
@@ -893,12 +896,12 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
 bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
                                       MachineInstr* DefMI,
                                       MachineBasicBlock::iterator RestorePt,
-                                      LiveIndex RestoreIdx,
+                                      SlotIndex RestoreIdx,
                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
   MachineBasicBlock& MBB = *RestorePt->getParent();
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  LiveIndex KillIdx;
+  SlotIndex KillIdx;
   if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
   else
@@ -911,8 +914,8 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
   LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
   
   ReconstructLiveInterval(CurrLI);
-  LiveIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
-  RematIdx = LIs->getDefIndex(RematIdx);
+  SlotIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
+  RematIdx = RematIdx.getDefIndex();
   RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
   
   ++NumSplits;
@@ -968,7 +971,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false,
+      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
                                          LSs->getVNInfoAllocator());
   }
   
@@ -1052,11 +1055,14 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
 /// so it would not cross the barrier that's being processed. Shrink wrap
 /// (minimize) the live interval to the last uses.
 bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
+  DEBUG(errs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
+               << "  result: ");
+
   CurrLI = LI;
 
   // Find live range where current interval cross the barrier.
   LiveInterval::iterator LR =
-    CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
+    CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex());
   VNInfo *ValNo = LR->valno;
 
   assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
@@ -1065,8 +1071,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
 
   // If this would create a new join point, do not split.
-  if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent()))
+  if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
+    DEBUG(errs() << "FAILED (would create a new join point).\n");
     return false;
+  }
 
   // Find all references in the barrier mbb.
   SmallPtrSet<MachineInstr*, 4> RefsInMBB;
@@ -1078,21 +1086,25 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Find a point to restore the value after the barrier.
-  LiveIndex RestoreIndex;
+  SlotIndex RestoreIndex;
   MachineBasicBlock::iterator RestorePt =
     findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
-  if (RestorePt == BarrierMBB->end())
+  if (RestorePt == BarrierMBB->end()) {
+    DEBUG(errs() << "FAILED (could not find a suitable restore point).\n");
     return false;
+  }
 
   if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
     if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt,
-                      RestoreIndex, RefsInMBB))
-    return true;
+                      RestoreIndex, RefsInMBB)) {
+      DEBUG(errs() << "success (remat).\n");
+      return true;
+    }
 
   // Add a spill either before the barrier or after the definition.
   MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
   const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
-  LiveIndex SpillIndex;
+  SlotIndex SpillIndex;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
   if (!ValNo->isDefAccurate()) {
@@ -1103,8 +1115,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     } else {
       MachineBasicBlock::iterator SpillPt = 
         findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex);
-      if (SpillPt == BarrierMBB->begin())
+      if (SpillPt == BarrierMBB->begin()) {
+        DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
         return false; // No gap to insert spill.
+      }
       // Add spill.
     
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
@@ -1116,8 +1130,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
                                  RestoreIndex, SpillIndex, SS)) {
     // If it's already split, just restore the value. There is no need to spill
     // the def again.
-    if (!DefMI)
+    if (!DefMI) {
+      DEBUG(errs() << "FAILED (def is dead).\n");
       return false; // Def is dead. Do nothing.
+    }
     
     if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
                              BarrierMBB, SS, RefsInMBB))) {
@@ -1129,12 +1145,16 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
         // Add spill after the def and the last use before the barrier.
         SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
                                  RefsInMBB, SpillIndex);
-        if (SpillPt == DefMBB->begin())
+        if (SpillPt == DefMBB->begin()) {
+          DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
+        }
       } else {
         SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex);
-        if (SpillPt == DefMBB->end())
+        if (SpillPt == DefMBB->end()) {
+          DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
+        }
       }
       // Add spill. 
       SS = CreateSpillStackSlot(CurrLI->reg, RC);
@@ -1162,18 +1182,19 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
 
   // Update spill stack slot live interval.
-  UpdateSpillSlotInterval(ValNo, LIs->getNextSlot(LIs->getUseIndex(SpillIndex)),
-                          LIs->getDefIndex(RestoreIndex));
+  UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(),
+                          RestoreIndex.getDefIndex());
 
   ReconstructLiveInterval(CurrLI);
 
   if (!FoldedRestore) {
-    LiveIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
-    RestoreIdx = LIs->getDefIndex(RestoreIdx);
+    SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+    RestoreIdx = RestoreIdx.getDefIndex();
     RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
   }
   
   ++NumSplits;
+  DEBUG(errs() << "success.\n");
   return true;
 }
 
@@ -1254,8 +1275,8 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
     // reaching definition (VNInfo).
     for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
-      LiveIndex index = LIs->getInstructionIndex(&*UI);
-      index = LIs->getUseIndex(index);
+      SlotIndex index = LIs->getInstructionIndex(&*UI);
+      index = index.getUseIndex();
       
       const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
       VNUseCount[LR->valno].insert(&*UI);
@@ -1404,7 +1425,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
   if (LR->valno->hasPHIKill())
     return false;
   
-  LiveIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+  SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
   if (LR->end < MBBEnd)
     return false;
   
@@ -1467,6 +1488,7 @@ bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
   TII    = TM->getInstrInfo();
   MFI    = MF.getFrameInfo();
   MRI    = &MF.getRegInfo();
+  SIs    = &getAnalysis<SlotIndexes>();
   LIs    = &getAnalysis<LiveIntervals>();
   LSs    = &getAnalysis<LiveStacks>();
   VRM    = &getAnalysis<VirtRegMap>();
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 0000000..48567a0
--- /dev/null
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,231 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+
+using namespace llvm;
+
+char ProcessImplicitDefs::ID = 0;
+static RegisterPass<ProcessImplicitDefs> X("processimpdefs",
+                                           "Process Implicit Definitions.");
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved<LiveVariables>();
+  AU.addRequired<LiveVariables>();
+  AU.addPreservedID(MachineLoopInfoID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  AU.addPreservedID(PHIEliminationID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+                                                 unsigned Reg, unsigned OpIdx,
+                                                 const TargetInstrInfo *tii_) {
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      Reg == SrcReg)
+    return true;
+
+  if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+    return true;
+  if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+    return true;
+  return false;
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
+/// there is one implicit_def for each use. Add isUndef marker to
+/// implicit_def defs and their uses.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
+
+  DEBUG(errs() << "********** PROCESS IMPLICIT DEFS **********\n"
+               << "********** Function: "
+               << ((Value*)fn.getFunction())->getName() << '\n');
+
+  bool Changed = false;
+
+  const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo();
+  const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo();
+  MachineRegisterInfo *mri_ = &fn.getRegInfo();
+
+  LiveVariables *lv_ = &getAnalysis<LiveVariables>();
+
+  SmallSet<unsigned, 8> ImpDefRegs;
+  SmallVector<MachineInstr*, 8> ImpDefMIs;
+  MachineBasicBlock *Entry = fn.begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+        unsigned Reg = MI->getOperand(0).getReg();
+        ImpDefRegs.insert(Reg);
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+            ImpDefRegs.insert(*SS);
+        }
+        ImpDefMIs.push_back(MI);
+        continue;
+      }
+
+      if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+        MachineOperand &MO = MI->getOperand(2);
+        if (ImpDefRegs.count(MO.getReg())) {
+          // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
+          // This is an identity copy, eliminate it now.
+          if (MO.isKill()) {
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+            vi.removeKill(MI);
+          }
+          MI->eraseFromParent();
+          Changed = true;
+          continue;
+        }
+      }
+
+      bool ChangedToImpDef = false;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand& MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        if (!ImpDefRegs.count(Reg))
+          continue;
+        // Use is a copy, just turn it into an implicit_def.
+        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
+          bool isKill = MO.isKill();
+          MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            MI->RemoveOperand(j);
+          if (isKill) {
+            ImpDefRegs.erase(Reg);
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            vi.removeKill(MI);
+          }
+          ChangedToImpDef = true;
+          Changed = true;
+          break;
+        }
+
+        Changed = true;
+        MO.setIsUndef();
+        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+          // Make sure other uses of 
+          for (unsigned j = i+1; j != e; ++j) {
+            MachineOperand &MOJ = MI->getOperand(j);
+            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+              MOJ.setIsUndef();
+          }
+          ImpDefRegs.erase(Reg);
+        }
+      }
+
+      if (ChangedToImpDef) {
+        // Backtrack to process this new implicit_def.
+        --I;
+      } else {
+        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+          MachineOperand& MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          ImpDefRegs.erase(MO.getReg());
+        }
+      }
+    }
+
+    // Any outstanding liveout implicit_def's?
+    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
+      MachineInstr *MI = ImpDefMIs[i];
+      unsigned Reg = MI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+          !ImpDefRegs.count(Reg)) {
+        // Delete all "local" implicit_def's. That include those which define
+        // physical registers since they cannot be liveout.
+        MI->eraseFromParent();
+        Changed = true;
+        continue;
+      }
+
+      // If there are multiple defs of the same register and at least one
+      // is not an implicit_def, do not insert implicit_def's before the
+      // uses.
+      bool Skip = false;
+      for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
+             DE = mri_->def_end(); DI != DE; ++DI) {
+        if (DI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+          Skip = true;
+          break;
+        }
+      }
+      if (Skip)
+        continue;
+
+      // The only implicit_def which we want to keep are those that are live
+      // out of its block.
+      MI->eraseFromParent();
+      Changed = true;
+
+      for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+             UE = mri_->use_end(); UI != UE; ) {
+        MachineOperand &RMO = UI.getOperand();
+        MachineInstr *RMI = &*UI;
+        ++UI;
+        MachineBasicBlock *RMBB = RMI->getParent();
+        if (RMBB == MBB)
+          continue;
+
+        // Turn a copy use into an implicit_def.
+        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+        if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+            Reg == SrcReg) {
+          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            RMI->RemoveOperand(j);
+          continue;
+        }
+
+        const TargetRegisterClass* RC = mri_->getRegClass(Reg);
+        unsigned NewVReg = mri_->createVirtualRegister(RC);
+        RMO.setReg(NewVReg);
+        RMO.setIsUndef();
+        RMO.setIsKill();
+      }
+    }
+    ImpDefRegs.clear();
+    ImpDefMIs.clear();
+  }
+
+  return Changed;
+}
+
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index a0860a1..230a20c 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -542,7 +542,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  if (RS && RegInfo->hasFP(Fn)) {
+  if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -571,7 +571,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Make sure the special register scavenging spill slot is closest to the
   // stack pointer.
-  if (RS && !RegInfo->hasFP(Fn)) {
+  if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 70e8640..5507646 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -14,7 +14,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
@@ -55,8 +55,7 @@ namespace {
   /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
   /// for holding FixedStack values, which must include a frame
   /// index.
-  class VISIBILITY_HIDDEN FixedStackPseudoSourceValue
-    : public PseudoSourceValue {
+  class FixedStackPseudoSourceValue : public PseudoSourceValue {
     const int FI;
   public:
     explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {}
@@ -65,6 +64,8 @@ namespace {
 
     virtual bool isAliased(const MachineFrameInfo *MFI) const;
 
+    virtual bool mayAlias(const MachineFrameInfo *) const;
+
     virtual void printCustom(raw_ostream &OS) const {
       OS << "FixedStack" << FI;
     }
@@ -101,6 +102,14 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
   return true;
 }
 
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+  if (this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return false;
+  return true;
+}
+
 bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
   return MFI && MFI->isImmutableObjectIndex(FI);
 }
@@ -114,3 +123,10 @@ bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
   // Spill slots should not alias others.
   return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
 }
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+  if (!MFI)
+    return true;
+  // Spill slots will not alias any LLVM IR value.
+  return !MFI->isSpillSlotObjectIndex(FI);
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5d58ea9..6930abf 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -33,7 +33,6 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -70,7 +69,7 @@ linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
 
 namespace {
-  struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass {
+  struct RALinScan : public MachineFunctionPass {
     static char ID;
     RALinScan() : MachineFunctionPass(&ID) {}
 
@@ -146,6 +145,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
       if (StrongPHIElim)
         AU.addRequiredID(StrongPHIEliminationID);
       // Make sure PassManager knows which analyses to make available
@@ -176,11 +176,11 @@ namespace {
 
     /// processActiveIntervals - expire old intervals and move non-overlapping
     /// ones to the inactive list.
-    void processActiveIntervals(LiveIndex CurPoint);
+    void processActiveIntervals(SlotIndex CurPoint);
 
     /// processInactiveIntervals - expire old intervals and move overlapping
     /// ones to the active list.
-    void processInactiveIntervals(LiveIndex CurPoint);
+    void processInactiveIntervals(SlotIndex CurPoint);
 
     /// hasNextReloadInterval - Return the next liveinterval that's being
     /// defined by a reload from the same SS as the specified one.
@@ -366,7 +366,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     return Reg;
 
   VNInfo *vni = cur.begin()->valno;
-  if ((vni->def == LiveIndex()) ||
+  if ((vni->def == SlotIndex()) ||
       vni->isUnused() || !vni->isDefAccurate())
     return Reg;
   MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
@@ -403,7 +403,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
         if (!O.isKill())
           continue;
         MachineInstr *MI = &*I;
-        if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI))))
+        if (SrcLI.liveAt(li_->getInstructionIndex(MI).getDefIndex()))
           O.setIsKill(false);
       }
     }
@@ -480,10 +480,17 @@ void RALinScan::initIntervalSets()
 
   for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
     if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
-      mri_->setPhysRegUsed(i->second->reg);
-      fixed_.push_back(std::make_pair(i->second, i->second->begin()));
-    } else
-      unhandled_.push(i->second);
+      if (!i->second->empty()) {
+        mri_->setPhysRegUsed(i->second->reg);
+        fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+      }
+    } else {
+      if (i->second->empty()) {
+        assignRegOrStackSlotAtInterval(i->second);
+      }
+      else
+        unhandled_.push(i->second);
+    }
   }
 }
 
@@ -503,13 +510,13 @@ void RALinScan::linearScan() {
     ++NumIters;
     DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n');
 
-    if (!cur->empty()) {
-      processActiveIntervals(cur->beginIndex());
-      processInactiveIntervals(cur->beginIndex());
+    assert(!cur->empty() && "Empty interval in unhandled set.");
 
-      assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
-             "Can only allocate virtual registers!");
-    }
+    processActiveIntervals(cur->beginIndex());
+    processInactiveIntervals(cur->beginIndex());
+
+    assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+           "Can only allocate virtual registers!");
 
     // Allocating a virtual register. try to find a free
     // physical register or spill an interval (possibly this one) in order to
@@ -586,7 +593,7 @@ void RALinScan::linearScan() {
 
 /// processActiveIntervals - expire old intervals and move non-overlapping ones
 /// to the inactive list.
-void RALinScan::processActiveIntervals(LiveIndex CurPoint)
+void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 {
   DEBUG(errs() << "\tprocessing active intervals:\n");
 
@@ -632,7 +639,7 @@ void RALinScan::processActiveIntervals(LiveIndex CurPoint)
 
 /// processInactiveIntervals - expire old intervals and move overlapping
 /// ones to the active list.
-void RALinScan::processInactiveIntervals(LiveIndex CurPoint)
+void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
 {
   DEBUG(errs() << "\tprocessing inactive intervals:\n");
 
@@ -713,7 +720,7 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
   return IP.end();
 }
 
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, LiveIndex Point){
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     RALinScan::IntervalPtr &IP = V[i];
     LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
@@ -739,7 +746,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
   if (SI.hasAtLeastOneValue())
     VNI = SI.getValNumInfo(0);
   else
-    VNI = SI.getNextValue(LiveIndex(), 0, false,
+    VNI = SI.getNextValue(SlotIndex(), 0, false,
                           ls_->getVNInfoAllocator());
 
   LiveInterval &RI = li_->getInterval(cur->reg);
@@ -907,7 +914,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   backUpRegUses();
 
   std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
-  LiveIndex StartPosition = cur->beginIndex();
+  SlotIndex StartPosition = cur->beginIndex();
   const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
 
   // If start of this live interval is defined by a move instruction and its
@@ -917,7 +924,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
   if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if ((vni->def != LiveIndex()) && !vni->isUnused() &&
+    if ((vni->def != SlotIndex()) && !vni->isUnused() &&
          vni->isDefAccurate()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
@@ -1119,6 +1126,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
         DowngradedRegs.clear();
         assignRegOrStackSlotAtInterval(cur);
       } else {
+        assert(false && "Ran out of registers during register allocation!");
         llvm_report_error("Ran out of registers during register allocation!");
       }
       return;
@@ -1173,7 +1181,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
       LiveInterval *ReloadLi = added[i];
       if (ReloadLi->weight == HUGE_VALF &&
           li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-        LiveIndex ReloadIdx = ReloadLi->beginIndex();
+        SlotIndex ReloadIdx = ReloadLi->beginIndex();
         MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
         int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
         if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
@@ -1243,7 +1251,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     spilled.insert(sli->reg);
   }
 
-  LiveIndex earliestStart = earliestStartInterval->beginIndex();
+  SlotIndex earliestStart = earliestStartInterval->beginIndex();
 
   DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
 
@@ -1324,7 +1332,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     LiveInterval *ReloadLi = added[i];
     if (ReloadLi->weight == HUGE_VALF &&
         li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-      LiveIndex ReloadIdx = ReloadLi->beginIndex();
+      SlotIndex ReloadIdx = ReloadLi->beginIndex();
       MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
       int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
       if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 28ede55..1957c16 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
@@ -44,7 +43,7 @@ static RegisterRegAlloc
                 createLocalRegisterAllocator);
 
 namespace {
-  class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass {
+  class RALocal : public MachineFunctionPass {
   public:
     static char ID;
     RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index bee5d93..5757e47 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -70,7 +70,7 @@ namespace {
   /// PBQP based allocators solve the register allocation problem by mapping
   /// register allocation problems to Partitioned Boolean Quadratic
   /// Programming problems.
-  class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass {
+  class PBQPRegAlloc : public MachineFunctionPass {
   public:
 
     static char ID;
@@ -85,6 +85,8 @@ namespace {
 
     /// PBQP analysis usage.
     virtual void getAnalysisUsage(AnalysisUsage &au) const {
+      au.addRequired<SlotIndexes>();
+      au.addPreserved<SlotIndexes>();
       au.addRequired<LiveIntervals>();
       //au.addRequiredID(SplitCriticalEdgesID);
       au.addRequired<RegisterCoalescer>();
@@ -684,7 +686,7 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
     vni = stackInterval.getValNumInfo(0);
   else
     vni = stackInterval.getNextValue(
-      LiveIndex(), 0, false, lss->getVNInfoAllocator());
+      SlotIndex(), 0, false, lss->getVNInfoAllocator());
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
@@ -832,7 +834,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   tm = &mf->getTarget();
   tri = tm->getRegisterInfo();
   tii = tm->getInstrInfo();
-  mri = &mf->getRegInfo();
+  mri = &mf->getRegInfo(); 
 
   lis = &getAnalysis<LiveIntervals>();
   lss = &getAnalysis<LiveStacks>();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 2518ce1..cf90aba 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -177,7 +177,24 @@ void RegScavenger::forward() {
     if (!Reg || isReserved(Reg))
       continue;
     if (MO.isUse()) {
-      assert(isUsed(Reg) && "Using an undefined register!");
+      if (!isUsed(Reg)) {
+        // Check if it's partial live: e.g.
+        // D0 = insert_subreg D0<undef>, S0
+        // ... D0
+        // The problem is the insert_subreg could be eliminated. The use of
+        // D0 is using a partially undef value. This is not *incorrect* since
+        // S1 is can be freely clobbered.
+        // Ideally we would like a way to model this, but leaving the
+        // insert_subreg around causes both correctness and performance issues.
+        bool SubUsed = false;
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             unsigned SubReg = *SubRegs; ++SubRegs)
+          if (isUsed(SubReg)) {
+            SubUsed = true;
+            break;
+          }
+        assert(SubUsed && "Using an undefined register!");
+      }
       assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
              "Using an early clobbered register!");
     } else {
@@ -227,7 +244,7 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
 ///
 /// No more than InstrLimit instructions are inspected.
 ///
-unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI,
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
                                        BitVector &Candidates,
                                        unsigned InstrLimit,
                                        MachineBasicBlock::iterator &UseMI) {
@@ -235,19 +252,37 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI,
   assert(Survivor > 0 && "No candidates for scavenging");
 
   MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
-  assert(MI != ME && "MI already at terminator");
+  assert(StartMI != ME && "MI already at terminator");
+  MachineBasicBlock::iterator RestorePointMI = StartMI;
+  MachineBasicBlock::iterator MI = StartMI;
 
+  bool inVirtLiveRange = false;
   for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+    bool isVirtKillInsn = false;
+    bool isVirtDefInsn = false;
     // Remove any candidates touched by instruction.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
-          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg())
         continue;
+      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        if (MO.isDef())
+          isVirtDefInsn = true;
+        else if (MO.isKill())
+          isVirtKillInsn = true;
+        continue;
+      }
       Candidates.reset(MO.getReg());
       for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
         Candidates.reset(*R);
     }
+    // If we're not in a virtual reg's live range, this is a valid
+    // restore point.
+    if (!inVirtLiveRange) RestorePointMI = MI;
+
+    // Update whether we're in the live range of a virtual register
+    if (isVirtKillInsn) inVirtLiveRange = false;
+    if (isVirtDefInsn) inVirtLiveRange = true;
 
     // Was our survivor untouched by this instruction?
     if (Candidates.test(Survivor))
@@ -259,9 +294,13 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI,
 
     Survivor = Candidates.find_first();
   }
+  // If we ran off the end, that's where we want to restore.
+  if (MI == ME) RestorePointMI = ME;
+  assert (RestorePointMI != StartMI &&
+          "No available scavenger restore location!");
 
   // We ran out of candidates, so stop the search.
-  UseMI = MI;
+  UseMI = RestorePointMI;
   return Survivor;
 }
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 5a59862..1363a92 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -183,8 +183,8 @@ void SUnit::setHeightDirty() {
 /// setDepthToAtLeast - Update this node's successors to reflect the
 /// fact that this node's depth just increased.
 ///
-void SUnit::setDepthToAtLeast(unsigned NewDepth) {
-  if (NewDepth <= getDepth())
+void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) {
+  if (NewDepth <= getDepth(IgnoreAntiDep))
     return;
   setDepthDirty();
   Depth = NewDepth;
@@ -194,8 +194,8 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth) {
 /// setHeightToAtLeast - Update this node's predecessors to reflect the
 /// fact that this node's height just increased.
 ///
-void SUnit::setHeightToAtLeast(unsigned NewHeight) {
-  if (NewHeight <= getHeight())
+void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) {
+  if (NewHeight <= getHeight(IgnoreAntiDep))
     return;
   setHeightDirty();
   Height = NewHeight;
@@ -204,7 +204,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight) {
 
 /// ComputeDepth - Calculate the maximal path from the node to the exit.
 ///
-void SUnit::ComputeDepth() {
+void SUnit::ComputeDepth(bool IgnoreAntiDep) {
   SmallVector<SUnit*, 8> WorkList;
   WorkList.push_back(this);
   do {
@@ -214,6 +214,7 @@ void SUnit::ComputeDepth() {
     unsigned MaxPredDepth = 0;
     for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
          E = Cur->Preds.end(); I != E; ++I) {
+      if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
       SUnit *PredSU = I->getSUnit();
       if (PredSU->isDepthCurrent)
         MaxPredDepth = std::max(MaxPredDepth,
@@ -237,7 +238,7 @@ void SUnit::ComputeDepth() {
 
 /// ComputeHeight - Calculate the maximal path from the node to the entry.
 ///
-void SUnit::ComputeHeight() {
+void SUnit::ComputeHeight(bool IgnoreAntiDep) {
   SmallVector<SUnit*, 8> WorkList;
   WorkList.push_back(this);
   do {
@@ -247,6 +248,7 @@ void SUnit::ComputeHeight() {
     unsigned MaxSuccHeight = 0;
     for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
          E = Cur->Succs.end(); I != E; ++I) {
+      if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
       SUnit *SuccSU = I->getSUnit();
       if (SuccSU->isHeightCurrent)
         MaxSuccHeight = std::max(MaxSuccHeight,
@@ -346,7 +348,7 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
       AnyNotSched = true;
     }
     if (SUnits[i].isScheduled &&
-        (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
+        (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
           unsigned(INT_MAX)) {
       if (!AnyNotSched)
         errs() << "*** Scheduling failed! ***\n";
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 0d15c02..8e03420 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -50,8 +50,10 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
           break;
         }
       }
-      TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
-                        SU->CopyDstRC, SU->CopySrcRC);
+      bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
+                                       SU->CopyDstRC, SU->CopySrcRC);
+      (void)Success;
+      assert(Success && "copyRegToReg failed!");
     } else {
       // Copy from physical register.
       assert(I->getReg() && "Unknown physical register!");
@@ -59,8 +61,10 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
       bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
       isNew = isNew; // Silence compiler warning.
       assert(isNew && "Node emitted out of order - early");
-      TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
-                        SU->CopyDstRC, SU->CopySrcRC);
+      bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
+                                       SU->CopyDstRC, SU->CopySrcRC);
+      (void)Success;
+      assert(Success && "copyRegToReg failed!");
     }
     break;
   }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 43454dd..6070ff6 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -98,7 +98,9 @@ static const Value *getUnderlyingObject(const Value *V) {
 /// information and it can be tracked to a normal reference to a known
 /// object, return the Value for that object. Otherwise return null.
 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
-                                                const MachineFrameInfo *MFI) {
+                                                const MachineFrameInfo *MFI,
+                                                bool &MayAlias) {
+  MayAlias = true;
   if (!MI->hasOneMemOperand() ||
       !(*MI->memoperands_begin())->getValue() ||
       (*MI->memoperands_begin())->isVolatile())
@@ -110,6 +112,7 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 
   V = getUnderlyingObject(V);
   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+    MayAlias = PSV->mayAlias(MFI);
     // For now, ignore PseudoSourceValues which may alias LLVM IR values
     // because the code that uses this function has no way to cope with
     // such aliases.
@@ -124,6 +127,23 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
   return 0;
 }
 
+static bool mayUnderlyingObjectForInstrAlias(const MachineInstr *MI,
+                                             const MachineFrameInfo *MFI) {
+  if (!MI->hasOneMemOperand() ||
+      !(*MI->memoperands_begin())->getValue() ||
+      (*MI->memoperands_begin())->isVolatile())
+    return true;
+
+  const Value *V = (*MI->memoperands_begin())->getValue();
+  if (!V)
+    return true;
+
+  V = getUnderlyingObject(V);
+  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+    return PSV->mayAlias(MFI);
+  return true;
+}
+
 void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
   if (MachineLoop *ML = MLI.getLoopFor(BB))
     if (BB == ML->getLoopLatch()) {
@@ -208,7 +228,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           SUnit *DefSU = DefList[i];
           if (DefSU != SU &&
               (Kind != SDep::Output || !MO.isDead() ||
-               !DefSU->getInstr()->registerDefIsDead(Reg)))
+               !DefSU->getInstr()->registerDefIsDead(*Alias)))
             DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
         }
       }
@@ -317,29 +337,35 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     }
 
     // Add chain dependencies.
+    // Chain dependencies used to enforce memory order should have
+    // latency of 0 (except for true dependency of Store followed by
+    // aliased Load... we estimate that with a single cycle of latency
+    // assuming the hardware will bypass)
     // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
     // after stack slots are lowered to actual addresses.
     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
     // produce more precise dependence information.
+#define STORE_LOAD_LATENCY 1
+    unsigned TrueMemOrderLatency = 0;
     if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
     new_chain:
       // This is the conservative case. Add dependencies on all memory
       // references.
       if (Chain)
-        Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+        Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
       Chain = SU;
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-        PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
+        PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
       PendingLoads.clear();
       for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
            E = MemDefs.end(); I != E; ++I) {
-        I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
+        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
         I->second = SU;
       }
       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
            MemUses.begin(), E = MemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency));
+          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
         I->second.clear();
       }
       // See if it is known to just have a single memory reference.
@@ -356,12 +382,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         // Unknown memory accesses. Assume the worst.
         ChainMMO = 0;
     } else if (TID.mayStore()) {
-      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI)) {
+      bool MayAlias = true;
+      TrueMemOrderLatency = STORE_LOAD_LATENCY;
+      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
         // A store to a specific PseudoSourceValue. Add precise dependencies.
         // Handle the def in MemDefs, if there is one.
         std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
         if (I != MemDefs.end()) {
-          I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
                                   /*isNormalMemory=*/true));
           I->second = SU;
         } else {
@@ -372,49 +400,58 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           MemUses.find(V);
         if (J != MemUses.end()) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
-            J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
-                                       /*isNormalMemory=*/true));
+            J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
+                                       /*Reg=*/0, /*isNormalMemory=*/true));
           J->second.clear();
         }
-        // Add dependencies from all the PendingLoads, since without
-        // memoperands we must assume they alias anything.
-        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-          PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
-        // Add a general dependence too, if needed.
-        if (Chain)
-          Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
-      } else
+        if (MayAlias) {
+          // Add dependencies from all the PendingLoads, since without
+          // memoperands we must assume they alias anything.
+          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+            PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+          // Add a general dependence too, if needed.
+          if (Chain)
+            Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        }
+      } else if (MayAlias) {
         // Treat all other stores conservatively.
         goto new_chain;
+      }
     } else if (TID.mayLoad()) {
+      bool MayAlias = true;
+      TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
-      } else if (const Value *V = getUnderlyingObjectForInstr(MI, MFI)) {
+      } else if (const Value *V = 
+                     getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
         // A load from a specific PseudoSourceValue. Add precise dependencies.
         std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
         if (I != MemDefs.end())
-          I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
                                   /*isNormalMemory=*/true));
         MemUses[V].push_back(SU);
 
         // Add a general dependence too, if needed.
         if (Chain && (!ChainMMO ||
                       (ChainMMO->isStore() || ChainMMO->isVolatile())))
-          Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+          Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
       } else if (MI->hasVolatileMemoryRef()) {
         // Treat volatile loads conservatively. Note that this includes
         // cases where memoperand information is unavailable.
         goto new_chain;
-      } else {
-        // A normal load. Depend on the general chain, as well as on
+      } else if (MayAlias) {
+        // A "MayAlias" load. Depend on the general chain, as well as on
         // all stores. In the absense of MachineMemOperand information,
         // we can't even assume that the load doesn't alias well-behaved
         // memory locations.
         if (Chain)
-          Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+          Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
         for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
-             E = MemDefs.end(); I != E; ++I)
-          I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
+               E = MemDefs.end(); I != E; ++I) {
+          SUnit *DefSU = I->second;
+          if (mayUnderlyingObjectForInstrAlias(DefSU->getInstr(), MFI))
+            DefSU->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        }
         PendingLoads.push_back(SU);
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e3f8f0f..5f70cb8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -57,7 +56,7 @@ namespace {
 
 //------------------------------ DAGCombiner ---------------------------------//
 
-  class VISIBILITY_HIDDEN DAGCombiner {
+  class DAGCombiner {
     SelectionDAG &DAG;
     const TargetLowering &TLI;
     CombineLevel Level;
@@ -280,8 +279,7 @@ public:
 namespace {
 /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
 /// nodes from the worklist.
-class VISIBILITY_HIDDEN WorkListRemover :
-  public SelectionDAG::DAGUpdateListener {
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
   DAGCombiner &DC;
 public:
   explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
@@ -5732,15 +5730,17 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
 
       // If this is an EXTLOAD, the VT's must match.
       if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
-        // FIXME: this conflates two src values, discarding one.  This is not
-        // the right thing to do, but nothing uses srcvalues now.  When they do,
-        // turn SrcValue into a list of locations.
+        // FIXME: this discards src value information.  This is
+        // over-conservative. It would be beneficial to be able to remember
+        // both potential memory locations.
         SDValue Addr;
         if (TheSelect->getOpcode() == ISD::SELECT) {
           // Check that the condition doesn't reach either load.  If so, folding
           // this will induce a cycle into the DAG.
-          if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-              !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {
+          if ((!LLD->hasAnyUseOfValue(1) ||
+               !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+              (!RLD->hasAnyUseOfValue(1) ||
+               !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
             Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
                                LLD->getBasePtr().getValueType(),
                                TheSelect->getOperand(0), LLD->getBasePtr(),
@@ -5749,10 +5749,12 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
         } else {
           // Check that the condition doesn't reach either load.  If so, folding
           // this will induce a cycle into the DAG.
-          if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-              !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-              !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&
-              !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {
+          if ((!LLD->hasAnyUseOfValue(1) ||
+               (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+              (!RLD->hasAnyUseOfValue(1) ||
+               (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
             Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
                                LLD->getBasePtr().getValueType(),
                                TheSelect->getOperand(0),
@@ -5768,16 +5770,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
             Load = DAG.getLoad(TheSelect->getValueType(0),
                                TheSelect->getDebugLoc(),
                                LLD->getChain(),
-                               Addr,LLD->getSrcValue(),
-                               LLD->getSrcValueOffset(),
+                               Addr, 0, 0,
                                LLD->isVolatile(),
                                LLD->getAlignment());
           } else {
             Load = DAG.getExtLoad(LLD->getExtensionType(),
                                   TheSelect->getDebugLoc(),
                                   TheSelect->getValueType(0),
-                                  LLD->getChain(), Addr, LLD->getSrcValue(),
-                                  LLD->getSrcValueOffset(),
+                                  LLD->getChain(), Addr, 0, 0,
                                   LLD->getMemoryVT(),
                                   LLD->isVolatile(),
                                   LLD->getAlignment());
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index d3ffb2a..da311ed 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -349,6 +349,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
   } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
                                             ES->getTargetFlags()));
+  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
@@ -556,7 +558,7 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
     MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                    cast<MachineSDNode>(Node)->memoperands_end());
 
-    if (II.usesCustomDAGSchedInsertionHook()) {
+    if (II.usesCustomInsertionHook()) {
       // Insert this instruction into the basic block using a target
       // specific inserter which may returns a new basic block.
       MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
@@ -571,6 +573,12 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
         unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
         if (Node->hasAnyUseOfValue(i))
           EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+        // If there are no uses, mark the register as dead now, so that
+        // MachineLICM/Sink can see that it's dead. Don't do this if the
+        // node has a Flag value, for the benefit of targets still using
+        // Flag for values in physregs.
+        else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+          MI->addRegisterDead(Reg, TRI);
       }
     }
     return;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7138dd2..f389f7f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -32,7 +32,6 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -55,7 +54,7 @@ using namespace llvm;
 /// will attempt merge setcc and brc instructions into brcc's.
 ///
 namespace {
-class VISIBILITY_HIDDEN SelectionDAGLegalize {
+class SelectionDAGLegalize {
   TargetLowering &TLI;
   SelectionDAG &DAG;
   CodeGenOpt::Level OptLevel;
@@ -2574,16 +2573,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::ConstantFP: {
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
-    bool isLegal = false;
-    for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),
-            E = TLI.legal_fpimm_end(); I != E; ++I) {
-      if (CFP->isExactlyValue(*I)) {
-        isLegal = true;
-        break;
-      }
-    }
     // If this is a legal constant, turn it into a TargetConstantFP node.
-    if (isLegal)
+    if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
       Results.push_back(SDValue(Node, 0));
     else
       Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 5992f5d..c4bd552 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -623,8 +623,7 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) {
 namespace {
   /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
   /// updates to nodes and recomputes their ready state.
-  class VISIBILITY_HIDDEN NodeUpdateListener :
-    public SelectionDAG::DAGUpdateListener {
+  class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
     DAGTypeLegalizer &DTL;
     SmallSetVector<SDNode*, 16> &NodesToAnalyze;
   public:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index e0f93d8..4045a34 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -40,7 +39,7 @@ namespace {
   /// FastPriorityQueue - A degenerate priority queue that considers
   /// all nodes to have the same priority.
   ///
-  struct VISIBILITY_HIDDEN FastPriorityQueue {
+  struct FastPriorityQueue {
     SmallVector<SUnit *, 16> Queue;
 
     bool empty() const { return Queue.empty(); }
@@ -60,7 +59,7 @@ namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
 ///
-class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
 private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   FastPriorityQueue AvailableQueue;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index c8d2158..faf21f7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/PriorityQueue.h"
@@ -48,7 +47,7 @@ namespace {
 /// ScheduleDAGList - The actual list scheduler implementation.  This supports
 /// top-down scheduling.
 ///
-class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes {
+class ScheduleDAGList : public ScheduleDAGSDNodes {
 private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   ///
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index cec24e6..7e1015a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
@@ -53,7 +52,7 @@ namespace {
 /// ScheduleDAGRRList - The actual register reduction list scheduler
 /// implementation.  This supports both top-down and bottom-up scheduling.
 ///
-class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
 private:
   /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
   /// it is top-down.
@@ -965,8 +964,7 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
 
 namespace {
   template<class SF>
-  class VISIBILITY_HIDDEN RegReductionPriorityQueue
-   : public SchedulingPriorityQueue {
+  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
     PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
     unsigned currentQueueId;
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index c9c36f7..ebb31ac 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -58,6 +58,7 @@ namespace llvm {
       if (isa<ConstantPoolSDNode>(Node))   return true;
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
+      if (isa<BlockAddressSDNode>(Node))   return true;
       if (Node->getOpcode() == ISD::EntryToken) return true;
       return false;
     }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 37736c0..98e7317 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -460,6 +460,11 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
       ID.AddInteger(SVN->getMaskElt(i));
     break;
   }
+  case ISD::TargetBlockAddress:
+  case ISD::BlockAddress: {
+    ID.AddPointer(cast<BlockAddressSDNode>(N));
+    break;
+  }
   } // end switch (N->getOpcode())
 }
 
@@ -1317,6 +1322,23 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, DebugLoc DL,
+                                      bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(TLI.getPointerTy()), 0, 0);
+  ID.AddPointer(BA);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+  SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
+  new (N) BlockAddressSDNode(Opc, DL, TLI.getPointerTy(), BA);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getSrcValue(const Value *V) {
   assert((!V || isa<PointerType>(V->getType())) &&
          "SrcValue is not a pointer?");
@@ -5307,31 +5329,26 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
   return false;
 }
 
-
-static void findPredecessor(SDNode *N, const SDNode *P, bool &found,
-                            SmallPtrSet<SDNode *, 32> &Visited) {
-  if (found || !Visited.insert(N))
-    return;
-
-  for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {
-    SDNode *Op = N->getOperand(i).getNode();
-    if (Op == P) {
-      found = true;
-      return;
-    }
-    findPredecessor(Op, P, found, Visited);
-  }
-}
-
 /// isPredecessorOf - Return true if this node is a predecessor of N. This node
-/// is either an operand of N or it can be reached by recursively traversing
-/// up the operands.
+/// is either an operand of N or it can be reached by traversing up the operands.
 /// NOTE: this is an expensive method. Use it carefully.
 bool SDNode::isPredecessorOf(SDNode *N) const {
   SmallPtrSet<SDNode *, 32> Visited;
-  bool found = false;
-  findPredecessor(N, this, found, Visited);
-  return found;
+  SmallVector<SDNode *, 16> Worklist;
+  Worklist.push_back(N);
+
+  do {
+    N = Worklist.pop_back_val();
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+      SDNode *Op = N->getOperand(i).getNode();
+      if (Op == this)
+        return true;
+      if (Visited.insert(Op))
+        Worklist.push_back(Op);
+    }
+  } while (!Worklist.empty());
+
+  return false;
 }
 
 uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
@@ -5405,6 +5422,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_RETURN: return "EH_RETURN";
   case ISD::ConstantPool:  return "ConstantPool";
   case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::BlockAddress:  return "BlockAddress";
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN: {
@@ -5426,6 +5444,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::TargetJumpTable:  return "TargetJumpTable";
   case ISD::TargetConstantPool:  return "TargetConstantPool";
   case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress: return "TargetBlockAddress";
 
   case ISD::CopyToReg:     return "CopyToReg";
   case ISD::CopyFromReg:   return "CopyFromReg";
@@ -5735,9 +5754,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
     if (G && R->getReg() &&
         TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
-      OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg());
+      OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
     } else {
-      OS << " #" << R->getReg();
+      OS << " %reg" << R->getReg();
     }
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
@@ -5753,7 +5772,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     OS << ":" << N->getVT().getEVTString();
   }
   else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
-    OS << " <" << *LD->getMemOperand();
+    OS << "<" << *LD->getMemOperand();
 
     bool doExt = true;
     switch (LD->getExtensionType()) {
@@ -5771,7 +5790,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
 
     OS << ">";
   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
-    OS << " <" << *ST->getMemOperand();
+    OS << "<" << *ST->getMemOperand();
 
     if (ST->isTruncatingStore())
       OS << ", trunc to " << ST->getMemoryVT().getEVTString();
@@ -5782,7 +5801,14 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     
     OS << ">";
   } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
-    OS << " <" << *M->getMemOperand() << ">";
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index adcc532..c0d2a4d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -322,6 +322,12 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
     MBBMap[BB] = MBB;
     MF->push_back(MBB);
 
+    // Transfer the address-taken flag. This is necessary because there could
+    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+    // the first one should be marked.
+    if (BB->hasAddressTaken())
+      MBB->setHasAddressTaken();
+
     // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
     // appropriate.
     PHINode *PN;
@@ -895,6 +901,9 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
       return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());
     }
 
+    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      return DAG.getBlockAddress(BA, getCurDebugLoc());
+
     const VectorType *VecTy = cast<VectorType>(V->getType());
     unsigned NumElements = VecTy->getNumElements();
 
@@ -2131,6 +2140,16 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
   }
 }
 
+void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) {
+  // Update machine-CFG edges.
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+
+  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          getValue(I.getAddress())));
+}
+
 
 void SelectionDAGLowering::visitFSub(User &I) {
   // -0.0 - X --> fneg
@@ -2666,7 +2685,8 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
       }
 
       // N = N + Idx * ElementSize;
-      uint64_t ElementSize = TD->getTypeAllocSize(Ty);
+      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+                                TD->getTypeAllocSize(Ty));
       SDValue IdxN = getValue(Idx);
 
       // If the index is smaller or larger than intptr_t, truncate or extend
@@ -2676,13 +2696,13 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
       // If this is a multiply by a power of two, turn it into a shl
       // immediately.  This is a very common case.
       if (ElementSize != 1) {
-        if (isPowerOf2_64(ElementSize)) {
-          unsigned Amt = Log2_64(ElementSize);
+        if (ElementSize.isPowerOf2()) {
+          unsigned Amt = ElementSize.logBase2();
           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
                              N.getValueType(), IdxN,
                              DAG.getConstant(Amt, TLI.getPointerTy()));
         } else {
-          SDValue Scale = DAG.getIntPtrConstant(ElementSize);
+          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
                              N.getValueType(), IdxN, Scale);
         }
@@ -4203,6 +4223,21 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Result);
     return 0;
   }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+
+    assert(CI && "Non-constant type in __builtin_object_size?");
+
+    SDValue Arg = getValue(I.getOperand(0));
+    EVT Ty = Arg.getValueType();
+
+    if (CI->getZExtValue() < 2)
+      setValue(&I, DAG.getConstant(-1U, Ty));
+    else
+      setValue(&I, DAG.getConstant(0, Ty));
+    return 0;
+  }
   case Intrinsic::var_annotation:
     // Discard annotate attributes
     return 0;
@@ -5485,26 +5520,6 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
   DAG.setRoot(Chain);
 }
 
-void SelectionDAGLowering::visitFree(FreeInst &I) {
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  Entry.Node = getValue(I.getOperand(0));
-  Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
-  Args.push_back(Entry);
-  EVT IntPtr = TLI.getPointerTy();
-  bool isTailCall = PerformTailCallOpt &&
-                    isInTailCallPosition(&I, Attribute::None, TLI);
-  std::pair<SDValue,SDValue> Result =
-    TLI.LowerCallTo(getRoot(), Type::getVoidTy(*DAG.getContext()),
-                    false, false, false, false,
-                    0, CallingConv::C, isTailCall,
-                    /*isReturnValueUsed=*/true,
-                    DAG.getExternalSymbol("free", IntPtr), Args, DAG,
-                    getCurDebugLoc());
-  if (Result.second.getNode())
-    DAG.setRoot(Result.second);
-}
-
 void SelectionDAGLowering::visitVAStart(CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
                           MVT::Other, getRoot(),
@@ -5735,8 +5750,7 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
 
 #include "llvm/CodeGen/SelectionDAGISel.h"
 
-void SelectionDAGISel::
-LowerArguments(BasicBlock *LLVMBB) {
+void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
   Function &F = *LLVMBB->getParent();
   SelectionDAG &DAG = SDL->DAG;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index 722b1d8..a0ec7aa 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -44,12 +44,12 @@ class FPExtInst;
 class FPToSIInst;
 class FPToUIInst;
 class FPTruncInst;
-class FreeInst;
 class Function;
 class GetElementPtrInst;
 class GCFunctionInfo;
 class ICmpInst;
 class IntToPtrInst;
+class IndirectBrInst;
 class InvokeInst;
 class InsertElementInst;
 class InsertValueInst;
@@ -449,6 +449,7 @@ private:
   void visitRet(ReturnInst &I);
   void visitBr(BranchInst &I);
   void visitSwitch(SwitchInst &I);
+  void visitIndirectBr(IndirectBrInst &I);
   void visitUnreachable(UnreachableInst &I) { /* noop */ }
 
   // Helpers for visitSwitch
@@ -528,7 +529,6 @@ private:
   void visitGetElementPtr(User &I);
   void visitSelect(User &I);
 
-  void visitFree(FreeInst &I);
   void visitAlloca(AllocaInst &I);
   void visitLoad(LoadInst &I);
   void visitStore(StoreInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 72e7f58..b63d5bb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -43,6 +44,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -149,16 +151,20 @@ namespace llvm {
 }
 
 // EmitInstrWithCustomInserter - This method should be implemented by targets
-// that mark instructions with the 'usesCustomDAGSchedInserter' flag.  These
+// that mark instructions with the 'usesCustomInserter' flag.  These
 // instructions are special in various ways, which require special support to
 // insert.  The specified MachineInstr is created but not inserted into any
-// basic blocks, and the scheduler passes ownership of it to this method.
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
 MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                          MachineBasicBlock *MBB,
                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
 #ifndef NDEBUG
   errs() << "If a target marks an instruction with "
-          "'usesCustomDAGSchedInserter', it must implement "
+          "'usesCustomInserter', it must implement "
           "TargetLowering::EmitInstrWithCustomInserter!";
 #endif
   llvm_unreachable(0);
@@ -1288,5 +1294,56 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   return !isNonImmUse(Root, N, U);
 }
 
+SDNode *SelectionDAGISel::Select_INLINEASM(SDValue N) {
+  std::vector<SDValue> Ops(N.getNode()->op_begin(), N.getNode()->op_end());
+  SelectInlineAsmMemoryOperands(Ops);
+    
+  std::vector<EVT> VTs;
+  VTs.push_back(MVT::Other);
+  VTs.push_back(MVT::Flag);
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(),
+                                VTs, &Ops[0], Ops.size());
+  return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) {
+  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::IMPLICIT_DEF,
+                              N.getValueType());
+}
+
+SDNode *SelectionDAGISel::Select_DBG_LABEL(const SDValue &N) {
+  SDValue Chain = N.getOperand(0);
+  unsigned C = cast<LabelSDNode>(N)->getLabelID();
+  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
+  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DBG_LABEL,
+                              MVT::Other, Tmp, Chain);
+}
+
+SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) {
+  SDValue Chain = N.getOperand(0);
+  unsigned C = cast<LabelSDNode>(N)->getLabelID();
+  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
+  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::EH_LABEL,
+                              MVT::Other, Tmp, Chain);
+}
+
+void SelectionDAGISel::CannotYetSelect(SDValue N) {
+  std::string msg;
+  raw_string_ostream Msg(msg);
+  Msg << "Cannot yet select: ";
+  N.getNode()->print(Msg, CurDAG);
+  llvm_report_error(Msg.str());
+}
+
+void SelectionDAGISel::CannotYetSelectIntrinsic(SDValue N) {
+  errs() << "Cannot yet select: ";
+  unsigned iid =
+    cast<ConstantSDNode>(N.getOperand(N.getOperand(0).getValueType() == MVT::Other))->getZExtValue();
+  if (iid < Intrinsic::num_intrinsics)
+    llvm_report_error("Cannot yet select: intrinsic %" + Intrinsic::getName((Intrinsic::ID)iid));
+  else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
+    llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
+                      tii->getName(iid));
+}
 
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a2baee4..9f36b67 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -481,7 +481,7 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
   setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
   
   // ConstantFP nodes default to expand.  Targets can either change this to 
-  // Legal, in which case all fp constants are legal, or use addLegalFPImmediate
+  // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 25a499b88..0e6d479 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -31,14 +31,13 @@
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/IRBuilder.h"
 
 using namespace llvm;
 
 namespace {
 
-  class VISIBILITY_HIDDEN ShadowStackGC : public GCStrategy {
+  class ShadowStackGC : public GCStrategy {
     /// RootChain - This is the global linked-list that contains the chain of GC
     /// roots.
     GlobalVariable *Head;
@@ -84,7 +83,7 @@ namespace {
   ///
   /// It's wrapped up in a state machine using the same transform C# uses for
   /// 'yield return' enumerators, This transform allows it to be non-allocating.
-  class VISIBILITY_HIDDEN EscapeEnumerator {
+  class EscapeEnumerator {
     Function &F;
     const char *CleanupBBName;
 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 9c283b0..b5d6b47 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -76,6 +76,7 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
+  AU.addPreserved<SlotIndexes>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreservedID(MachineDominatorsID);
@@ -105,7 +106,7 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
 bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
                                                     LiveInterval &IntB,
                                                     MachineInstr *CopyMI) {
-  LiveIndex CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
 
   // BValNo is a value number in B that is defined by a copy from A.  'B3' in
   // the example above.
@@ -120,7 +121,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveIndex CopyUseIdx = li_->getUseIndex(CopyIdx);
+  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
   LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
@@ -158,13 +159,13 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
 
   // Get the LiveRange in IntB that this value number starts with.
   LiveInterval::iterator ValLR =
-    IntB.FindLiveRangeContaining(li_->getPrevSlot(AValNo->def));
+    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
   assert(ValLR != IntB.end() && "Live range not found!");
 
   // Make sure that the end of the live range is inside the same block as
   // CopyMI.
   MachineInstr *ValLREndInst =
-    li_->getInstructionFromIndex(li_->getPrevSlot(ValLR->end));
+    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
   if (!ValLREndInst ||
       ValLREndInst->getParent() != CopyMI->getParent()) return false;
 
@@ -193,7 +194,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
       IntB.print(errs(), tri_);
     });
 
-  LiveIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
   // We are about to delete CopyMI, so need to remove it as the 'instruction
   // that defines this value #'. Update the the valnum with the new defining
   // instruction #.
@@ -306,8 +307,8 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
 bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
                                                         LiveInterval &IntB,
                                                         MachineInstr *CopyMI) {
-  LiveIndex CopyIdx =
-    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  SlotIndex CopyIdx =
+    li_->getInstructionIndex(CopyMI).getDefIndex();
 
   // FIXME: For now, only eliminate the copy by commuting its def when the
   // source register is a virtual register. We want to guard against cases
@@ -330,7 +331,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
   LiveInterval::iterator ALR =
-    IntA.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx));
+    IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); // 
 
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
@@ -376,7 +377,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
          UE = mri_->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
-    LiveIndex UseIdx = li_->getInstructionIndex(UseMI);
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end())
       continue;
@@ -401,7 +402,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   bool BHasPHIKill = BValNo->hasPHIKill();
   SmallVector<VNInfo*, 4> BDeadValNos;
   VNInfo::KillSet BKills;
-  std::map<LiveIndex, LiveIndex> BExtend;
+  std::map<SlotIndex, SlotIndex> BExtend;
 
   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
   // A = or A, B
@@ -428,7 +429,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     ++UI;
     if (JoinedCopies.count(UseMI))
       continue;
-    LiveIndex UseIdx= li_->getUseIndex(li_->getInstructionIndex(UseMI));
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
       continue;
@@ -439,7 +440,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       if (Extended)
         UseMO.setIsKill(false);
       else
-        BKills.push_back(li_->getNextSlot(UseIdx));
+        BKills.push_back(UseIdx.getDefIndex());
     }
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -448,7 +449,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       // This copy will become a noop. If it's defining a new val#,
       // remove that val# as well. However this live range is being
       // extended to the end of the existing live range defined by the copy.
-      LiveIndex DefIdx = li_->getDefIndex(UseIdx);
+      SlotIndex DefIdx = UseIdx.getDefIndex();
       const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
       BHasPHIKill |= DLR->valno->hasPHIKill();
       assert(DLR->valno->def == DefIdx);
@@ -495,8 +496,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
-    LiveIndex End = AI->end;
-    std::map<LiveIndex, LiveIndex>::iterator
+    SlotIndex End = AI->end;
+    std::map<SlotIndex, SlotIndex>::iterator
       EI = BExtend.find(End);
     if (EI != BExtend.end())
       End = EI->second;
@@ -507,7 +508,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     if (BHasSubRegs) {
       for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
         LiveInterval &SRLI = li_->getInterval(*SR);
-        SRLI.MergeInClobberRange(AI->start, End, li_->getVNInfoAllocator());
+        SRLI.MergeInClobberRange(*li_, AI->start, End, li_->getVNInfoAllocator());
       }
     }
   }
@@ -551,7 +552,7 @@ static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
 /// from a physical register live interval as well as from the live intervals
 /// of its sub-registers.
 static void removeRange(LiveInterval &li,
-                        LiveIndex Start, LiveIndex End,
+                        SlotIndex Start, SlotIndex End,
                         LiveIntervals *li_, const TargetRegisterInfo *tri_) {
   li.removeRange(Start, End, true);
   if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
@@ -559,8 +560,9 @@ static void removeRange(LiveInterval &li,
       if (!li_->hasInterval(*SR))
         continue;
       LiveInterval &sli = li_->getInterval(*SR);
-      LiveIndex RemoveStart = Start;
-      LiveIndex RemoveEnd = Start;
+      SlotIndex RemoveStart = Start;
+      SlotIndex RemoveEnd = Start;
+
       while (RemoveEnd != End) {
         LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart);
         if (LR == sli.end())
@@ -577,14 +579,14 @@ static void removeRange(LiveInterval &li,
 /// as the copy instruction, trim the live interval to the last use and return
 /// true.
 bool
-SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
                                                     MachineBasicBlock *CopyMBB,
                                                     LiveInterval &li,
                                                     const LiveRange *LR) {
-  LiveIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
-  LiveIndex LastUseIdx;
+  SlotIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
+  SlotIndex LastUseIdx;
   MachineOperand *LastUse =
-    lastRegisterUse(LR->start, li_->getPrevSlot(CopyIdx), li.reg, LastUseIdx);
+    lastRegisterUse(LR->start, CopyIdx.getPrevSlot(), li.reg, LastUseIdx);
   if (LastUse) {
     MachineInstr *LastUseMI = LastUse->getParent();
     if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
@@ -603,8 +605,8 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
     // There are uses before the copy, just shorten the live range to the end
     // of last use.
     LastUse->setIsKill();
-    removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_);
-    LR->valno->addKill(li_->getNextSlot(LastUseIdx));
+    removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_);
+    LR->valno->addKill(LastUseIdx.getDefIndex());
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
         DstReg == li.reg) {
@@ -617,7 +619,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
 
   // Is it livein?
   if (LR->start <= MBBStart && LR->end > MBBStart) {
-    if (LR->start == LiveIndex()) {
+    if (LR->start == li_->getZeroIndex()) {
       assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
       // Live-in to the function but dead. Remove it from entry live-in set.
       mf_->begin()->removeLiveIn(li.reg);
@@ -634,7 +636,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
                                                        unsigned DstReg,
                                                        unsigned DstSubIdx,
                                                        MachineInstr *CopyMI) {
-  LiveIndex CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
   LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
@@ -683,7 +685,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       return false;
   }
 
-  LiveIndex DefIdx = li_->getDefIndex(CopyIdx);
+  SlotIndex DefIdx = CopyIdx.getDefIndex();
   const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
   DLR->valno->setCopy(0);
   // Don't forget to update sub-register intervals.
@@ -716,7 +718,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     // should mark it dead:
     if (DefMI->getParent() == MBB) {
       DefMI->addRegisterDead(SrcInt.reg, tri_);
-      SrcLR->end = li_->getNextSlot(SrcLR->start);
+      SrcLR->end = SrcLR->start.getNextSlot();
     }
   }
 
@@ -815,8 +817,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
         (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
          allocatableRegs_[CopyDstReg])) {
       LiveInterval &LI = li_->getInterval(CopyDstReg);
-      LiveIndex DefIdx =
-        li_->getDefIndex(li_->getInstructionIndex(UseMI));
+      SlotIndex DefIdx =
+        li_->getInstructionIndex(UseMI).getDefIndex();
       if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) {
         if (DLR->valno->def == DefIdx)
           DLR->valno->setCopy(UseMI);
@@ -835,12 +837,12 @@ void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
     if (!UseMO.isKill())
       continue;
     MachineInstr *UseMI = UseMO.getParent();
-    LiveIndex UseIdx =
-      li_->getUseIndex(li_->getInstructionIndex(UseMI));
+    SlotIndex UseIdx =
+      li_->getInstructionIndex(UseMI).getUseIndex();
     const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
     if (!LR ||
-        (!LR->valno->isKill(li_->getNextSlot(UseIdx)) &&
-         LR->valno->def != li_->getNextSlot(UseIdx))) {
+        (!LR->valno->isKill(UseIdx.getDefIndex()) &&
+         LR->valno->def != UseIdx.getDefIndex())) {
       // Interesting problem. After coalescing reg1027's def and kill are both
       // at the same point:  %reg1027,0.000000e+00 = [56,814:0)  0@70-(814)
       //
@@ -881,16 +883,16 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
 /// Return true if live interval is removed.
 bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
                                                         MachineInstr *CopyMI) {
-  LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
   LiveInterval::iterator MLR =
-    li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx));
+    li.FindLiveRangeContaining(CopyIdx.getDefIndex());
   if (MLR == li.end())
     return false;  // Already removed by ShortenDeadCopySrcLiveRange.
-  LiveIndex RemoveStart = MLR->start;
-  LiveIndex RemoveEnd = MLR->end;
-  LiveIndex DefIdx = li_->getDefIndex(CopyIdx);
+  SlotIndex RemoveStart = MLR->start;
+  SlotIndex RemoveEnd = MLR->end;
+  SlotIndex DefIdx = CopyIdx.getDefIndex();
   // Remove the liverange that's defined by this.
-  if (RemoveStart == DefIdx && RemoveEnd == li_->getNextSlot(DefIdx)) {
+  if (RemoveStart == DefIdx && RemoveEnd == DefIdx.getStoreIndex()) {
     removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
     return removeIntervalIfEmpty(li, li_, tri_);
   }
@@ -901,7 +903,7 @@ bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
 /// the val# it defines. If the live interval becomes empty, remove it as well.
 bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
                                              MachineInstr *DefMI) {
-  LiveIndex DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
+  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
   LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
   if (DefIdx != MLR->valno->def)
     return false;
@@ -912,18 +914,18 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
 /// PropagateDeadness - Propagate the dead marker to the instruction which
 /// defines the val#.
 static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
-                              LiveIndex &LRStart, LiveIntervals *li_,
+                              SlotIndex &LRStart, LiveIntervals *li_,
                               const TargetRegisterInfo* tri_) {
   MachineInstr *DefMI =
-    li_->getInstructionFromIndex(li_->getDefIndex(LRStart));
+    li_->getInstructionFromIndex(LRStart.getDefIndex());
   if (DefMI && DefMI != CopyMI) {
     int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false);
     if (DeadIdx != -1)
       DefMI->getOperand(DeadIdx).setIsDead();
     else
       DefMI->addOperand(MachineOperand::CreateReg(li.reg,
-                                                  true, true, false, true));
-    LRStart = li_->getNextSlot(LRStart);
+                   /*def*/true, /*implicit*/true, /*kill*/false, /*dead*/true));
+    LRStart = LRStart.getNextSlot();
   }
 }
 
@@ -934,8 +936,8 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
 bool
 SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
                                                       MachineInstr *CopyMI) {
-  LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI);
-  if (CopyIdx == LiveIndex()) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+  if (CopyIdx == SlotIndex()) {
     // FIXME: special case: function live in. It can be a general case if the
     // first instruction index starts at > 0 value.
     assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
@@ -948,13 +950,13 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
   }
 
   LiveInterval::iterator LR =
-    li.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx));
+    li.FindLiveRangeContaining(CopyIdx.getPrevIndex().getStoreIndex());
   if (LR == li.end())
     // Livein but defined by a phi.
     return false;
 
-  LiveIndex RemoveStart = LR->start;
-  LiveIndex RemoveEnd = li_->getNextSlot(li_->getDefIndex(CopyIdx));
+  SlotIndex RemoveStart = LR->start;
+  SlotIndex RemoveEnd = CopyIdx.getStoreIndex();
   if (LR->end > RemoveEnd)
     // More uses past this copy? Nothing to do.
     return false;
@@ -974,7 +976,7 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
     // If the live range starts in another mbb and the copy mbb is not a fall
     // through mbb, then we can only cut the range from the beginning of the
     // copy mbb.
-    RemoveStart = li_->getNextSlot(li_->getMBBStartIdx(CopyMBB));
+    RemoveStart = li_->getMBBStartIdx(CopyMBB).getNextIndex().getBaseIndex();
 
   if (LR->valno->def == RemoveStart) {
     // If the def MI defines the val# and this copy is the only kill of the
@@ -1030,14 +1032,14 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
 
   // If the virtual register live interval extends into a loop, turn down
   // aggressiveness.
-  LiveIndex CopyIdx =
-    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+  SlotIndex CopyIdx =
+    li_->getInstructionIndex(CopyMI).getDefIndex();
   const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
   if (!L) {
     // Let's see if the virtual register live interval extends into the loop.
     LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
     assert(DLR != DstInt.end() && "Live range not found!");
-    DLR = DstInt.FindLiveRangeContaining(li_->getNextSlot(DLR->end));
+    DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot());
     if (DLR != DstInt.end()) {
       CopyMBB = li_->getMBBFromIndex(DLR->start);
       L = loopInfo->getLoopFor(CopyMBB);
@@ -1047,7 +1049,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
   if (!L || Length <= Threshold)
     return true;
 
-  LiveIndex UseIdx = li_->getUseIndex(CopyIdx);
+  SlotIndex UseIdx = CopyIdx.getUseIndex();
   LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
   MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
   if (loopInfo->getLoopFor(SMBB) != L) {
@@ -1060,7 +1062,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
       if (SuccMBB == CopyMBB)
         continue;
       if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
-                          li_->getNextSlot(li_->getMBBEndIdx(SuccMBB))))
+                      li_->getMBBEndIdx(SuccMBB).getNextIndex().getBaseIndex()))
         return false;
     }
   }
@@ -1091,12 +1093,12 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
 
   // If the virtual register live interval is defined or cross a loop, turn
   // down aggressiveness.
-  LiveIndex CopyIdx =
-    li_->getDefIndex(li_->getInstructionIndex(CopyMI));
-  LiveIndex UseIdx = li_->getUseIndex(CopyIdx);
+  SlotIndex CopyIdx =
+    li_->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex UseIdx = CopyIdx.getUseIndex();
   LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
   assert(SLR != SrcInt.end() && "Live range not found!");
-  SLR = SrcInt.FindLiveRangeContaining(li_->getPrevSlot(SLR->start));
+  SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot());
   if (SLR == SrcInt.end())
     return true;
   MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
@@ -1116,7 +1118,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
       if (PredMBB == SMBB)
         continue;
       if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
-                          li_->getNextSlot(li_->getMBBEndIdx(PredMBB))))
+                      li_->getMBBEndIdx(PredMBB).getNextIndex().getBaseIndex()))
         return false;
     }
   }
@@ -1367,7 +1369,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     if (SrcSubIdx)
       SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
     assert(SrcSubRC && "Illegal subregister index");
-    if (!SrcSubRC->contains(DstReg)) {
+    if (!SrcSubRC->contains(DstSubReg)) {
       DEBUG(errs() << "\tIncompatible source regclass: "
                    << tri_->getName(DstSubReg) << " not in "
                    << SrcSubRC->getName() << ".\n");
@@ -1705,7 +1707,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
     // Update the liveintervals of sub-registers.
     for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS)
-      li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt,
+      li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt,
                                                  li_->getVNInfoAllocator());
   }
 
@@ -1832,6 +1834,25 @@ static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) {
   return std::find(V.begin(), V.end(), Val) != V.end();
 }
 
+static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR,
+                           const TargetInstrInfo *TII,
+                           const TargetRegisterInfo *TRI) {
+  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+  if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+    ;
+  else if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+  } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+             MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(2).getReg();
+  } else
+    return false;
+  return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) &&
+         (DstReg == DR || TRI->isSuperRegister(DR, DstReg));
+}
+
 /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
 /// the specified live interval is defined by a copy from the specified
 /// register.
@@ -1848,12 +1869,9 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
     // It's a sub-register live interval, we may not have precise information.
     // Re-compute it.
     MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start);
-    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-    if (DefMI &&
-        tii_->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-        DstReg == li.reg && SrcReg == Reg) {
+    if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) {
       // Cache computed info.
-      LR->valno->def  = LR->start;
+      LR->valno->def = LR->start;
       LR->valno->setCopy(DefMI);
       return true;
     }
@@ -1861,6 +1879,23 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
   return false;
 }
 
+
+/// ValueLiveAt - Return true if the LiveRange pointed to by the given
+/// iterator, or any subsequent range with the same value number,
+/// is live at the given point.
+bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr,
+                                           LiveInterval::iterator LREnd,
+                                           SlotIndex defPoint) const {
+  for (const VNInfo *valno = LRItr->valno;
+       (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) {
+    if (LRItr->contains(defPoint))
+      return true;
+  }
+
+  return false;
+}
+
+
 /// SimpleJoin - Attempt to joint the specified interval into this one. The
 /// caller of this method must guarantee that the RHS only contains a single
 /// value number and that the RHS is not defined by a copy from this
@@ -1907,7 +1942,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
         if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
           return false;    // Nope, bail out.
 
-        if (LHSIt->contains(RHSIt->valno->def))
+        if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
           // Here is an interesting situation:
           // BB1:
           //   vr1025 = copy vr1024
@@ -1945,7 +1980,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           // Otherwise, if this is a copy from the RHS, mark it as being merged
           // in.
           if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
-            if (LHSIt->contains(RHSIt->valno->def))
+            if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
               // Here is an interesting situation:
               // BB1:
               //   vr1025 = copy vr1024
@@ -2030,7 +2065,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
   // Update the liveintervals of sub-registers.
   if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
     for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
-      li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS,
+      li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS,
                                                     li_->getVNInfoAllocator());
 
   return true;
@@ -2131,7 +2166,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     } else {
       // It was defined as a copy from the LHS, find out what value # it is.
       RHSValNoInfo =
-        LHS.getLiveRangeContaining(li_->getPrevSlot(RHSValNoInfo0->def))->valno;
+        LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno;
       RHSValID = RHSValNoInfo->id;
       RHSVal0DefinedFromLHS = RHSValID;
     }
@@ -2195,7 +2230,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
 
       // Figure out the value # from the RHS.
       LHSValsDefinedFromRHS[VNI]=
-        RHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno;
+        RHS.getLiveRangeContaining(VNI->def.getPrevSlot())->valno;
     }
 
     // Loop over the value numbers of the RHS, seeing if any are defined from
@@ -2213,7 +2248,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
 
       // Figure out the value # from the LHS.
       RHSValsDefinedFromLHS[VNI]=
-        LHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno;
+        LHS.getLiveRangeContaining(VNI->def.getPrevSlot())->valno;
     }
 
     LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
@@ -2477,11 +2512,11 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
 /// lastRegisterUse - Returns the last use of the specific register between
 /// cycles Start and End or NULL if there are no uses.
 MachineOperand *
-SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start,
-                                          LiveIndex End,
+SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
+                                          SlotIndex End,
                                           unsigned Reg,
-                                          LiveIndex &UseIdx) const{
-  UseIdx = LiveIndex();
+                                          SlotIndex &UseIdx) const{
+  UseIdx = SlotIndex();
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     MachineOperand *LastUse = NULL;
     for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg),
@@ -2493,22 +2528,24 @@ SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start,
           SrcReg == DstReg)
         // Ignore identity copies.
         continue;
-      LiveIndex Idx = li_->getInstructionIndex(UseMI);
+      SlotIndex Idx = li_->getInstructionIndex(UseMI);
+      // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something
+      // that compares higher than any other interval.
       if (Idx >= Start && Idx < End && Idx >= UseIdx) {
         LastUse = &Use;
-        UseIdx = li_->getUseIndex(Idx);
+        UseIdx = Idx.getUseIndex();
       }
     }
     return LastUse;
   }
 
-  LiveIndex s = Start;
-  LiveIndex e = li_->getBaseIndex(li_->getPrevSlot(End));
+  SlotIndex s = Start;
+  SlotIndex e = End.getPrevSlot().getBaseIndex();
   while (e >= s) {
     // Skip deleted instructions
     MachineInstr *MI = li_->getInstructionFromIndex(e);
-    while (e != LiveIndex() && li_->getPrevIndex(e) >= s && !MI) {
-      e = li_->getPrevIndex(e);
+    while (e != SlotIndex() && e.getPrevIndex() >= s && !MI) {
+      e = e.getPrevIndex();
       MI = li_->getInstructionFromIndex(e);
     }
     if (e < s || MI == NULL)
@@ -2522,12 +2559,12 @@ SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start,
         MachineOperand &Use = MI->getOperand(i);
         if (Use.isReg() && Use.isUse() && Use.getReg() &&
             tri_->regsOverlap(Use.getReg(), Reg)) {
-          UseIdx = li_->getUseIndex(e);
+          UseIdx = e.getUseIndex();
           return &Use;
         }
       }
 
-    e = li_->getPrevIndex(e);
+    e = e.getPrevIndex();
   }
 
   return NULL;
@@ -2551,24 +2588,30 @@ void SimpleRegisterCoalescing::releaseMemory() {
 static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
   for (LiveInterval::Ranges::const_iterator
          i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
-    if (li_->getPrevIndex(i->end) > i->start)
+    if (i->end.getPrevIndex() > i->start)
       return false;
   return true;
 }
 
+
 void SimpleRegisterCoalescing::CalculateSpillWeights() {
   SmallSet<unsigned, 4> Processed;
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
     MachineBasicBlock* MBB = mbbi;
-    LiveIndex MBBEnd = li_->getMBBEndIdx(MBB);
+    SlotIndex MBBEnd = li_->getMBBEndIdx(MBB);
     MachineLoop* loop = loopInfo->getLoopFor(MBB);
     unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
-    bool isExit = loop ? loop->isLoopExit(MBB) : false;
+    bool isExiting = loop ? loop->isLoopExiting(MBB) : false;
 
-    for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
+    for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end();
          mii != mie; ++mii) {
-      MachineInstr *MI = mii;
+      const MachineInstr *MI = mii;
+      if (tii_->isIdentityCopy(*MI))
+        continue;
+
+      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+        continue;
 
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         const MachineOperand &mopi = MI->getOperand(i);
@@ -2596,10 +2639,9 @@ void SimpleRegisterCoalescing::CalculateSpillWeights() {
 
         LiveInterval &RegInt = li_->getInterval(Reg);
         float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth);
-        if (HasDef && isExit) {
+        if (HasDef && isExiting) {
           // Looks like this is a loop count variable update.
-          LiveIndex DefIdx =
-            li_->getDefIndex(li_->getInstructionIndex(MI));
+          SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
           const LiveRange *DLR =
             li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
           if (DLR->end > MBBEnd)
@@ -2706,7 +2748,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
             // registers unless the definition is dead. e.g.
             // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
             // or else the scavenger may complain. LowerSubregs will
-            // change this to an IMPLICIT_DEF later.
+            // delete them later.
             DoDelete = false;
         }
         if (MI->registerDefIsDead(DstReg)) {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 3ebe3a1..78f8a9a 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -146,7 +146,7 @@ namespace llvm {
     /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
     /// block as the copy instruction, trim the ive interval to the last use
     /// and return true.
-    bool TrimLiveIntervalToLastUse(LiveIndex CopyIdx,
+    bool TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
                                    MachineBasicBlock *CopyMBB,
                                    LiveInterval &li, const LiveRange *LR);
 
@@ -201,6 +201,12 @@ namespace llvm {
     bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
                                       unsigned SubIdx, unsigned &RealDstReg);
 
+    /// ValueLiveAt - Return true if the LiveRange pointed to by the given
+    /// iterator, or any subsequent range with the same value number,
+    /// is live at the given point.
+    bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd, 
+                     SlotIndex defPoint) const;                                  
+
     /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
     /// the specified live interval is defined by a copy from the specified
     /// register.
@@ -235,9 +241,8 @@ namespace llvm {
 
     /// lastRegisterUse - Returns the last use of the specific register between
     /// cycles Start and End or NULL if there are no uses.
-    MachineOperand *lastRegisterUse(LiveIndex Start,
-                                    LiveIndex End, unsigned Reg,
-                                    LiveIndex &LastUseIdx) const;
+    MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
+                                    unsigned Reg, SlotIndex &LastUseIdx) const;
 
     /// CalculateSpillWeights - Compute spill weights for all virtual register
     /// live intervals.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index e987fa2..6de03e1 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -27,7 +27,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
@@ -38,7 +37,7 @@ STATISTIC(NumUnwinds, "Number of unwinds replaced");
 STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
-  class VISIBILITY_HIDDEN SjLjEHPass : public FunctionPass {
+  class SjLjEHPass : public FunctionPass {
 
     const TargetLowering *TLI;
 
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 0000000..6b04029
--- /dev/null
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,189 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass  ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+std::auto_ptr<IndexListEntry> SlotIndex::emptyKeyPtr(0),
+                              SlotIndex::tombstoneKeyPtr(0);
+
+char SlotIndexes::ID = 0;
+static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+  au.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+  mi2iMap.clear();
+  mbb2IdxMap.clear();
+  idx2MBBMap.clear();
+  terminatorGaps.clear();
+  clearList();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+  // Compute numbering as follows:
+  // Grab an iterator to the start of the index list.
+  // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+  // iterator in lock-step (though skipping it over indexes which have
+  // null pointers in the instruction field).
+  // At each iteration assert that the instruction pointed to in the index
+  // is the same one pointed to by the MI iterator. This 
+
+  // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+  // only need to be set up once after the first numbering is computed.
+
+  mf = &fn;
+  initList();
+
+  const unsigned gap = 1;
+
+  // Check that the list contains only the sentinal.
+  assert(indexListHead->getNext() == 0 &&
+         "Index list non-empty at initial numbering?");
+  assert(idx2MBBMap.empty() &&
+         "Index -> MBB mapping non-empty at initial numbering?");
+  assert(mbb2IdxMap.empty() &&
+         "MBB -> Index mapping non-empty at initial numbering?");
+  assert(mi2iMap.empty() &&
+         "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+  functionSize = 0;
+  /*  
+  for (unsigned s = 0; s < SlotIndex::NUM; ++s) {  
+    indexList.push_back(createEntry(0, s));
+  }
+
+  unsigned index = gap * SlotIndex::NUM;
+  */
+
+  unsigned index = 0;
+
+  // Iterate over the the function.
+  for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    MachineBasicBlock *mbb = &*mbbItr;
+
+    // Insert an index for the MBB start.
+    push_back(createEntry(0, index));
+    SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+
+    index += gap * SlotIndex::NUM;
+
+    for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      MachineInstr *mi = &*miItr;
+
+      if (miItr == mbb->getFirstTerminator()) {
+        push_back(createEntry(0, index));
+        terminatorGaps.insert(
+          std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
+        index += gap * SlotIndex::NUM;
+      }
+
+      // Insert a store index for the instr.
+      push_back(createEntry(mi, index));
+
+      // Save this base index in the maps.
+      mi2iMap.insert(
+        std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+ 
+      ++functionSize;
+
+      unsigned Slots = mi->getDesc().getNumDefs();
+      if (Slots == 0)
+        Slots = 1;
+
+      index += (Slots + 1) * gap * SlotIndex::NUM;
+    }
+
+    if (mbb->getFirstTerminator() == mbb->end()) {
+      push_back(createEntry(0, index));
+      terminatorGaps.insert(
+        std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
+      index += gap * SlotIndex::NUM;
+    }
+
+    SlotIndex blockEndIndex(back(), SlotIndex::STORE);
+    mbb2IdxMap.insert(
+      std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex)));
+
+    idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+  }
+
+  // One blank instruction at the end.
+  push_back(createEntry(0, index));
+
+  // Sort the Idx2MBBMap
+  std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+  DEBUG(dump());
+
+  // And we're done!
+  return false;
+}
+
+void SlotIndexes::renumber() {
+  assert(false && "SlotIndexes::runmuber is not fully implemented yet.");
+
+  // Compute numbering as follows:
+  // Grab an iterator to the start of the index list.
+  // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+  // iterator in lock-step (though skipping it over indexes which have
+  // null pointers in the instruction field).
+  // At each iteration assert that the instruction pointed to in the index
+  // is the same one pointed to by the MI iterator. This 
+
+  // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+  // only need to be set up once - when the first numbering is computed.
+
+  assert(false && "Renumbering not supported yet.");
+}
+
+void SlotIndexes::dump() const {
+  for (const IndexListEntry *itr = front(); itr != getTail();
+       itr = itr->getNext()) {
+    errs() << itr->getIndex() << " ";
+
+    if (itr->getInstr() != 0) {
+      errs() << *itr->getInstr();
+    } else {
+      errs() << "\n";
+    }
+  }
+
+  for (MBB2IdxMap::iterator itr = mbb2IdxMap.begin();
+       itr != mbb2IdxMap.end(); ++itr) {
+    errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
+           << itr->second.first << ", " << itr->second.second << "]\n";
+  }
+}
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+  os << getIndex();
+  if (isPHI())
+    os << "*";
+}
+
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+  print(errs());
+  errs() << "\n";
+}
+
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 0277d64..95e85be 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -51,13 +51,15 @@ protected:
 
   /// Ensures there is space before the given machine instruction, returns the
   /// instruction's new number.
-  LiveIndex makeSpaceBefore(MachineInstr *mi) {
+  SlotIndex makeSpaceBefore(MachineInstr *mi) {
     if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
-      lis->scaleNumbering(2);
-      ls->scaleNumbering(2);
+      // FIXME: Should be updated to use rewrite-in-place methods when they're
+      // introduced. Currently broken.
+      //lis->scaleNumbering(2);
+      //ls->scaleNumbering(2);
     }
 
-    LiveIndex miIdx = lis->getInstructionIndex(mi);
+    SlotIndex miIdx = lis->getInstructionIndex(mi);
 
     assert(lis->hasGapBeforeInstr(miIdx));
     
@@ -66,13 +68,15 @@ protected:
 
   /// Ensure there is space after the given machine instruction, returns the
   /// instruction's new number.
-  LiveIndex makeSpaceAfter(MachineInstr *mi) {
+  SlotIndex makeSpaceAfter(MachineInstr *mi) {
     if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
-      lis->scaleNumbering(2);
-      ls->scaleNumbering(2);
+      // FIXME: Should be updated to use rewrite-in-place methods when they're
+      // introduced. Currently broken.
+      // lis->scaleNumbering(2);
+      // ls->scaleNumbering(2);
     }
 
-    LiveIndex miIdx = lis->getInstructionIndex(mi);
+    SlotIndex miIdx = lis->getInstructionIndex(mi);
 
     assert(lis->hasGapAfterInstr(miIdx));
 
@@ -83,19 +87,19 @@ protected:
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  LiveIndex insertStoreAfter(MachineInstr *mi, unsigned ss,
+  SlotIndex insertStoreAfter(MachineInstr *mi, unsigned ss,
                                      unsigned vreg,
                                      const TargetRegisterClass *trc) {
 
     MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
-    LiveIndex miIdx = makeSpaceAfter(mi);
+    SlotIndex miIdx = makeSpaceAfter(mi);
 
     tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(next(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    LiveIndex storeInstIdx = lis->getNextIndex(miIdx);
+    SlotIndex storeInstIdx = miIdx.getNextIndex();
 
     assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -108,15 +112,15 @@ protected:
   /// Insert a store of the given vreg to the given stack slot immediately
   /// before the given instructnion. Returns the base index of the inserted
   /// Instruction.
-  LiveIndex insertStoreBefore(MachineInstr *mi, unsigned ss,
+  SlotIndex insertStoreBefore(MachineInstr *mi, unsigned ss,
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
-    LiveIndex miIdx = makeSpaceBefore(mi);
+    SlotIndex miIdx = makeSpaceBefore(mi);
   
     tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(prior(mi));
     MachineInstr *storeInst = &*storeInstItr;
-    LiveIndex storeInstIdx = lis->getPrevIndex(miIdx);
+    SlotIndex storeInstIdx = miIdx.getPrevIndex();
 
     assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -131,9 +135,9 @@ protected:
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
 
-    LiveIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
-    LiveIndex start = lis->getDefIndex(lis->getInstructionIndex(mi)),
-                      end = lis->getUseIndex(storeInstIdx);
+    SlotIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
+    SlotIndex start = lis->getInstructionIndex(mi).getDefIndex(),
+              end = storeInstIdx.getUseIndex();
 
     VNInfo *vni =
       li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
@@ -149,18 +153,18 @@ protected:
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsibel for adding/removing an appropriate
   /// range vreg's LiveInterval.
-  LiveIndex insertLoadAfter(MachineInstr *mi, unsigned ss,
+  SlotIndex insertLoadAfter(MachineInstr *mi, unsigned ss,
                                     unsigned vreg,
                                     const TargetRegisterClass *trc) {
 
     MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
-    LiveIndex miIdx = makeSpaceAfter(mi);
+    SlotIndex miIdx = makeSpaceAfter(mi);
 
     tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(next(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    LiveIndex loadInstIdx = lis->getNextIndex(miIdx);
+    SlotIndex loadInstIdx = miIdx.getNextIndex();
 
     assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
            "Store inst index already in use.");
@@ -174,15 +178,15 @@ protected:
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  LiveIndex insertLoadBefore(MachineInstr *mi, unsigned ss,
+  SlotIndex insertLoadBefore(MachineInstr *mi, unsigned ss,
                                      unsigned vreg,
                                      const TargetRegisterClass *trc) {  
-    LiveIndex miIdx = makeSpaceBefore(mi);
+    SlotIndex miIdx = makeSpaceBefore(mi);
   
     tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(prior(mi));
     MachineInstr *loadInst = &*loadInstItr;
-    LiveIndex loadInstIdx = lis->getPrevIndex(miIdx);
+    SlotIndex loadInstIdx = miIdx.getPrevIndex();
 
     assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
            "Load inst index already in use.");
@@ -197,9 +201,9 @@ protected:
                                       unsigned vreg,
                                       const TargetRegisterClass *trc) {
 
-    LiveIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
-    LiveIndex start = lis->getDefIndex(loadInstIdx),
-                      end = lis->getUseIndex(lis->getInstructionIndex(mi));
+    SlotIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
+    SlotIndex start = loadInstIdx.getDefIndex(),
+              end = lis->getInstructionIndex(mi).getUseIndex();
 
     VNInfo *vni =
       li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
@@ -321,21 +325,21 @@ public:
     vrm->assignVirt2StackSlot(li->reg, ss);
 
     MachineInstr *mi = 0;
-    LiveIndex storeIdx = LiveIndex();
+    SlotIndex storeIdx = SlotIndex();
 
     if (valno->isDefAccurate()) {
       // If we have an accurate def we can just grab an iterator to the instr
       // after the def.
       mi = lis->getInstructionFromIndex(valno->def);
-      storeIdx = lis->getDefIndex(insertStoreAfter(mi, ss, li->reg, trc));
+      storeIdx = insertStoreAfter(mi, ss, li->reg, trc).getDefIndex();
     } else {
       // if we get here we have a PHI def.
       mi = &lis->getMBBFromIndex(valno->def)->front();
-      storeIdx = lis->getDefIndex(insertStoreBefore(mi, ss, li->reg, trc));
+      storeIdx = insertStoreBefore(mi, ss, li->reg, trc).getDefIndex();
     }
 
     MachineBasicBlock *defBlock = mi->getParent();
-    LiveIndex loadIdx = LiveIndex();
+    SlotIndex loadIdx = SlotIndex();
 
     // Now we need to find the load...
     MachineBasicBlock::iterator useItr(mi);
@@ -343,11 +347,11 @@ public:
 
     if (useItr != defBlock->end()) {
       MachineInstr *loadInst = useItr;
-      loadIdx = lis->getUseIndex(insertLoadBefore(loadInst, ss, li->reg, trc));
+      loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc).getUseIndex();
     }
     else {
       MachineInstr *loadInst = &defBlock->back();
-      loadIdx = lis->getUseIndex(insertLoadAfter(loadInst, ss, li->reg, trc));
+      loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc).getUseIndex();
     }
 
     li->removeRange(storeIdx, loadIdx, true);
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 0204969..e8ee822 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -37,7 +37,7 @@ SSPBufferSize("stack-protector-buffer-size", cl::init(8),
                        "stack protection"));
 
 namespace {
-  class VISIBILITY_HIDDEN StackProtector : public FunctionPass {
+  class StackProtector : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// target type sizes.
     const TargetLowering *TLI;
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index fad0808..c299192 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -22,7 +22,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -53,7 +52,7 @@ STATISTIC(NumStoreElim,  "Number of stores eliminated");
 STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
 
 namespace {
-  class VISIBILITY_HIDDEN StackSlotColoring : public MachineFunctionPass {
+  class StackSlotColoring : public MachineFunctionPass {
     bool ColorWithRegs;
     LiveStacks* LS;
     VirtRegMap* VRM;
@@ -99,6 +98,8 @@ namespace {
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
       AU.addRequired<LiveStacks>();
       AU.addRequired<VirtRegMap>();
       AU.addPreserved<VirtRegMap>();      
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 48d6dc1..3c13906 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -32,12 +32,11 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass {
+  struct StrongPHIElimination : public MachineFunctionPass {
     static char ID; // Pass identification, replacement for typeid
     StrongPHIElimination() : MachineFunctionPass(&ID) {}
 
@@ -73,6 +72,8 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
       AU.addRequired<LiveIntervals>();
       
       // TODO: Actually make this true.
@@ -295,7 +296,7 @@ StrongPHIElimination::computeDomForest(
 static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
                      LiveIntervals& LI) {
   LiveInterval& I = LI.getOrCreateInterval(r);
-  LiveIndex idx = LI.getMBBStartIdx(MBB);
+  SlotIndex idx = LI.getMBBStartIdx(MBB);
   return I.liveAt(idx);
 }
 
@@ -428,7 +429,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     }
 
     LiveInterval& PI = LI.getOrCreateInterval(DestReg);
-    LiveIndex pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
+    SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
     VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
     PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
 
@@ -748,7 +749,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       
       LiveInterval& I = LI.getInterval(curr.second);
       MachineBasicBlock::iterator term = MBB->getFirstTerminator();
-      LiveIndex endIdx = LiveIndex();
+      SlotIndex endIdx = SlotIndex();
       if (term != MBB->end())
         endIdx = LI.getInstructionIndex(term);
       else
@@ -772,7 +773,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
   
   // Renumber the instructions so that we can perform the index computations
   // needed to create new live intervals.
-  LI.computeNumbering();
+  LI.renumber();
   
   // For copies that we inserted at the ends of predecessors, we construct
   // live intervals.  This is pretty easy, since we know that the destination
@@ -784,15 +785,15 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
        InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
     if (RegHandled.insert(I->first).second) {
       LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      LiveIndex instrIdx = LI.getInstructionIndex(I->second);
-      if (Int.liveAt(LI.getDefIndex(instrIdx)))
-        Int.removeRange(LI.getDefIndex(instrIdx),
-                        LI.getNextSlot(LI.getMBBEndIdx(I->second->getParent())),
+      SlotIndex instrIdx = LI.getInstructionIndex(I->second);
+      if (Int.liveAt(instrIdx.getDefIndex()))
+        Int.removeRange(instrIdx.getDefIndex(),
+                        LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
                         true);
       
       LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
       R.valno->setCopy(I->second);
-      R.valno->def = LI.getDefIndex(LI.getInstructionIndex(I->second));
+      R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
     }
   }
 }
@@ -817,8 +818,8 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
           Stacks[I->getOperand(i).getReg()].size()) {
         // Remove the live range for the old vreg.
         LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
-        LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining(
-                  LI.getUseIndex(LI.getInstructionIndex(I)));
+        LiveInterval::iterator OldLR =
+          OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
         if (OldLR != OldInt.end())
           OldInt.removeRange(*OldLR, true);
         
@@ -830,11 +831,10 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         VNInfo* FirstVN = *Int.vni_begin();
         FirstVN->setHasPHIKill(false);
         if (I->getOperand(i).isKill())
-          FirstVN->addKill(
-                 LI.getUseIndex(LI.getInstructionIndex(I)));
+          FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex());
         
         LiveRange LR (LI.getMBBStartIdx(I->getParent()),
-                      LI.getNextSlot(LI.getUseIndex(LI.getInstructionIndex(I))),
+                      LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
                       FirstVN);
         
         Int.addRange(LR);
@@ -863,14 +863,14 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
   LiveInterval& LHS = LI.getOrCreateInterval(primary);
   LiveInterval& RHS = LI.getOrCreateInterval(secondary);
   
-  LI.computeNumbering();
+  LI.renumber();
   
   DenseMap<VNInfo*, VNInfo*> VNMap;
   for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
     LiveRange R = *I;
  
-    LiveIndex Start = R.start;
-    LiveIndex End = R.end;
+    SlotIndex Start = R.start;
+    SlotIndex End = R.end;
     if (LHS.getLiveRangeContaining(Start))
       return false;
     
@@ -964,19 +964,19 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
           TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
                             I->first, SI->first, RC, RC);
           
-          LI.computeNumbering();
+          LI.renumber();
           
           LiveInterval& Int = LI.getOrCreateInterval(I->first);
-          LiveIndex instrIdx =
+          SlotIndex instrIdx =
                      LI.getInstructionIndex(--SI->second->getFirstTerminator());
-          if (Int.liveAt(LI.getDefIndex(instrIdx)))
-            Int.removeRange(LI.getDefIndex(instrIdx),
-                            LI.getNextSlot(LI.getMBBEndIdx(SI->second)), true);
+          if (Int.liveAt(instrIdx.getDefIndex()))
+            Int.removeRange(instrIdx.getDefIndex(),
+                            LI.getMBBEndIdx(SI->second).getNextSlot(), true);
 
           LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
                                             --SI->second->getFirstTerminator());
           R.valno->setCopy(--SI->second->getFirstTerminator());
-          R.valno->def = LI.getDefIndex(instrIdx);
+          R.valno->def = instrIdx.getDefIndex();
           
           DEBUG(errs() << "Renaming failed: " << SI->first << " -> "
                        << I->first << "\n");
@@ -1011,7 +1011,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       if (PI.containsOneValue()) {
         LI.removeInterval(DestReg);
       } else {
-        LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+        SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
         PI.removeRange(*PI.getLiveRangeContaining(idx), true);
       }
     } else {
@@ -1025,7 +1025,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         LiveInterval& InputI = LI.getInterval(reg);
         if (MBB != PInstr->getParent() &&
             InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
-            InputI.expiredAt(LI.getNextIndex(LI.getInstructionIndex(PInstr))))
+            InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
           InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
                              LI.getInstructionIndex(PInstr),
                              true);
@@ -1033,7 +1033,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       
       // If the PHI is not dead, then the valno defined by the PHI
       // now has an unknown def.
-      LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+      SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
       const LiveRange* PLR = PI.getLiveRangeContaining(idx);
       PLR->valno->setIsPHIDef(true);
       LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
@@ -1045,7 +1045,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
     PInstr->eraseFromParent();
   }
   
-  LI.computeNumbering();
+  LI.renumber();
   
   return true;
 }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index a5a0f5b..0a6a0d7 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -39,7 +39,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -57,8 +56,7 @@ STATISTIC(NumReMats,           "Number of instructions re-materialized");
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
 
 namespace {
-  class VISIBILITY_HIDDEN TwoAddressInstructionPass
-    : public MachineFunctionPass {
+  class TwoAddressInstructionPass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo *MRI;
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index e7c3412..6ab5db2 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -33,14 +33,13 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass {
+  class UnreachableBlockElim : public FunctionPass {
     virtual bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -95,8 +94,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
 
 
 namespace {
-  class VISIBILITY_HIDDEN UnreachableMachineBlockElim :
-        public MachineFunctionPass {
+  class UnreachableMachineBlockElim : public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &F);
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     MachineModuleInfo *MMI;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index cac098b..ce3eed1 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -56,7 +56,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   TII = mf.getTarget().getInstrInfo();
   TRI = mf.getTarget().getRegisterInfo();
   MF = &mf;
-  
+
   ReMatId = MAX_STACK_SLOT+1;
   LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
   
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index bdc2d1f..a5599f6 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -80,7 +80,7 @@ namespace llvm {
 
     /// Virt2SplitKillMap - This is splitted virtual register to its last use
     /// (kill) index mapping.
-    IndexedMap<LiveIndex> Virt2SplitKillMap;
+    IndexedMap<SlotIndex> Virt2SplitKillMap;
 
     /// ReMatMap - This is virtual register to re-materialized instruction
     /// mapping. Each virtual register whose definition is going to be
@@ -142,7 +142,7 @@ namespace llvm {
     VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
                    Virt2StackSlotMap(NO_STACK_SLOT), 
                    Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
-                   Virt2SplitKillMap(LiveIndex()), ReMatMap(NULL),
+                   Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
                    ReMatId(MAX_STACK_SLOT+1),
                    LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
     virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -266,17 +266,17 @@ namespace llvm {
     }
 
     /// @brief record the last use (kill) of a split virtual register.
-    void addKillPoint(unsigned virtReg, LiveIndex index) {
+    void addKillPoint(unsigned virtReg, SlotIndex index) {
       Virt2SplitKillMap[virtReg] = index;
     }
 
-    LiveIndex getKillPoint(unsigned virtReg) const {
+    SlotIndex getKillPoint(unsigned virtReg) const {
       return Virt2SplitKillMap[virtReg];
     }
 
     /// @brief remove the last use (kill) of a split virtual register.
     void removeKillPoint(unsigned virtReg) {
-      Virt2SplitKillMap[virtReg] = LiveIndex();
+      Virt2SplitKillMap[virtReg] = SlotIndex();
     }
 
     /// @brief returns true if the specified MachineInstr is a spill point.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 401bcb6..fd80f46 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -13,7 +13,6 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -66,7 +65,7 @@ namespace {
 /// This class is intended for use with the new spilling framework only. It
 /// rewrites vreg def/uses to use the assigned preg, but does not insert any
 /// spill code.
-struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
+struct TrivialRewriter : public VirtRegRewriter {
 
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
@@ -125,7 +124,7 @@ namespace {
 /// on a per-stack-slot / remat id basis as the low bit in the value of the
 /// SpillSlotsAvailable entries.  The predicate 'canClobberPhysReg()' checks
 /// this bit and addAvailable sets it if.
-class VISIBILITY_HIDDEN AvailableSpills {
+class AvailableSpills {
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
 
@@ -340,7 +339,7 @@ struct ReusedOp {
 
 /// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
 /// is reused instead of reloaded.
-class VISIBILITY_HIDDEN ReuseInfo {
+class ReuseInfo {
   MachineInstr &MI;
   std::vector<ReusedOp> Reuses;
   BitVector PhysRegsClobbered;
@@ -614,7 +613,7 @@ static void ReMaterialize(MachineBasicBlock &MBB,
     assert(MO.isUse());
     unsigned SubIdx = MO.getSubReg();
     unsigned Phys = VRM.getPhys(VirtReg);
-    assert(Phys);
+    assert(Phys && "Virtual register is not assigned a register?");
     unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
     MO.setReg(RReg);
     MO.setSubReg(0);
@@ -857,7 +856,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
         Spills.ClobberPhysReg(NewPhysReg);
         Spills.ClobberPhysReg(NewOp.PhysRegReused);
 
-        unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg;
+        unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
         MI->getOperand(NewOp.Operand).setReg(RReg);
         MI->getOperand(NewOp.Operand).setSubReg(0);
 
@@ -995,7 +994,7 @@ namespace {
 
 namespace {
 
-class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter {
+class LocalRewriter : public VirtRegRewriter {
   MachineRegisterInfo *RegInfo;
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
@@ -1431,8 +1430,9 @@ private:
                            std::vector<MachineOperand*> &KillOps,
                            VirtRegMap &VRM) {
 
+    MachineBasicBlock::iterator oldNextMII = next(MII);
     TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
-    MachineInstr *StoreMI = next(MII);
+    MachineInstr *StoreMI = prior(oldNextMII);
     VRM.addSpillSlotUse(StackSlot, StoreMI);
     DEBUG(errs() << "Store:\t" << *StoreMI);
 
@@ -1467,7 +1467,9 @@ private:
       }
     }
 
-    LastStore = next(MII);
+    // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
+    // the last of multiple instructions is the actual store.
+    LastStore = prior(oldNextMII);
 
     // If the stack slot value was previously available in some other
     // register, change it now.  Otherwise, make the register available,
@@ -1749,8 +1751,9 @@ private:
           const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
           unsigned Phys = VRM.getPhys(VirtReg);
           int StackSlot = VRM.getStackSlot(VirtReg);
+          MachineBasicBlock::iterator oldNextMII = next(MII);
           TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
-          MachineInstr *StoreMI = next(MII);
+          MachineInstr *StoreMI = prior(oldNextMII);
           VRM.addSpillSlotUse(StackSlot, StoreMI);
           DEBUG(errs() << "Store:\t" << *StoreMI);
           VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 053d960..21499e5 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -49,7 +49,7 @@ ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
 ExecutionEngine::ExecutionEngine(ModuleProvider *P)
   : EEState(*this),
     LazyFunctionCreator(0) {
-  LazyCompilationDisabled = false;
+  CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
   DlsymStubsEnabled       = false;
@@ -117,8 +117,7 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
 
 void *ExecutionEngineState::RemoveMapping(
   const MutexGuard &, const GlobalValue *ToUnmap) {
-  std::map<MapUpdatingCVH, void *>::iterator I =
-    GlobalAddressMap.find(getVH(ToUnmap));
+  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
   void *OldVal;
   if (I == GlobalAddressMap.end())
     OldVal = 0;
@@ -141,7 +140,7 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   DEBUG(errs() << "JIT: Map \'" << GV->getName() 
         << "\' to [" << Addr << "]\n";);
-  void *&CurVal = EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)];
+  void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
   assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
   CurVal = Addr;
   
@@ -183,7 +182,7 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
 void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
 
-  std::map<ExecutionEngineState::MapUpdatingCVH, void *> &Map =
+  ExecutionEngineState::GlobalAddressMapTy &Map =
     EEState.getGlobalAddressMap(locked);
 
   // Deleting from the mapping?
@@ -191,7 +190,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
     return EEState.RemoveMapping(locked, GV);
   }
   
-  void *&CurVal = Map[EEState.getVH(GV)];
+  void *&CurVal = Map[GV];
   void *OldVal = CurVal;
 
   if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
@@ -214,8 +213,8 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
   MutexGuard locked(lock);
   
-  std::map<ExecutionEngineState::MapUpdatingCVH, void*>::iterator I =
-    EEState.getGlobalAddressMap(locked).find(EEState.getVH(GV));
+  ExecutionEngineState::GlobalAddressMapTy::iterator I =
+    EEState.getGlobalAddressMap(locked).find(GV);
   return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
 }
 
@@ -227,7 +226,7 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
 
   // If we haven't computed the reverse mapping yet, do so first.
   if (EEState.getGlobalAddressReverseMap(locked).empty()) {
-    for (std::map<ExecutionEngineState::MapUpdatingCVH, void *>::iterator
+    for (ExecutionEngineState::GlobalAddressMapTy::iterator
          I = EEState.getGlobalAddressMap(locked).begin(),
          E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
       EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second,
@@ -476,7 +475,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     return getPointerToFunction(F);
 
   MutexGuard locked(lock);
-  void *p = EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)];
+  void *p = EEState.getGlobalAddressMap(locked)[GV];
   if (p)
     return p;
 
@@ -486,7 +485,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     EmitGlobalVariable(GVar);
   else
     llvm_unreachable("Global hasn't had an address allocated yet!");
-  return EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)];
+  return EEState.getGlobalAddressMap(locked)[GV];
 }
 
 /// This function converts a Constant* into a GenericValue. The interesting 
@@ -761,8 +760,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       Result.PointerVal = 0;
     else if (const Function *F = dyn_cast<Function>(C))
       Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
-    else if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
+    else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
       Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
+    else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      Result = PTOGV(getPointerToBasicBlock(const_cast<BasicBlock*>(
+                                                        BA->getBasicBlock())));
     else
       llvm_unreachable("Unknown constant pointer type!");
     break;
@@ -1072,17 +1074,22 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
   ++NumGlobals;
 }
 
-ExecutionEngineState::MapUpdatingCVH::MapUpdatingCVH(
-  ExecutionEngineState &EES, const GlobalValue *GV)
-  : CallbackVH(const_cast<GlobalValue*>(GV)), EES(EES) {}
+ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
+  : EE(EE), GlobalAddressMap(this) {
+}
 
-void ExecutionEngineState::MapUpdatingCVH::deleted() {
-  MutexGuard locked(EES.EE.lock);
-  EES.RemoveMapping(locked, *this);  // Destroys *this.
+sys::Mutex *ExecutionEngineState::AddressMapConfig::getMutex(
+  ExecutionEngineState *EES) {
+  return &EES->EE.lock;
+}
+void ExecutionEngineState::AddressMapConfig::onDelete(
+  ExecutionEngineState *EES, const GlobalValue *Old) {
+  void *OldVal = EES->GlobalAddressMap.lookup(Old);
+  EES->GlobalAddressReverseMap.erase(OldVal);
 }
 
-void ExecutionEngineState::MapUpdatingCVH::allUsesReplacedWith(
-  Value *new_value) {
+void ExecutionEngineState::AddressMapConfig::onRAUW(
+  ExecutionEngineState *, const GlobalValue *, const GlobalValue *) {
   assert(false && "The ExecutionEngine doesn't know how to handle a"
          " RAUW on a value it has a global mapping for.");
 }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index f8c775e..01bd2c7 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -572,9 +572,9 @@ void Interpreter::exitCalled(GenericValue GV) {
   // runAtExitHandlers() assumes there are no stack frames, but
   // if exit() was called, then it had a stack frame. Blow away
   // the stack before interpreting atexit handlers.
-  ECStack.clear ();
-  runAtExitHandlers ();
-  exit (GV.IntVal.zextOrTrunc(32).getZExtValue());
+  ECStack.clear();
+  runAtExitHandlers();
+  exit(GV.IntVal.zextOrTrunc(32).getZExtValue());
 }
 
 /// Pop the last stack frame off of ECStack and then copy the result
@@ -585,8 +585,8 @@ void Interpreter::exitCalled(GenericValue GV) {
 /// care of switching to the normal destination BB, if we are returning
 /// from an invoke.
 ///
-void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy,
-                                                  GenericValue Result) {
+void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
+                                                 GenericValue Result) {
   // Pop the current stack frame.
   ECStack.pop_back();
 
@@ -629,15 +629,15 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
   // Unwind stack
   Instruction *Inst;
   do {
-    ECStack.pop_back ();
-    if (ECStack.empty ())
+    ECStack.pop_back();
+    if (ECStack.empty())
       llvm_report_error("Empty stack during unwind!");
-    Inst = ECStack.back ().Caller.getInstruction ();
-  } while (!(Inst && isa<InvokeInst> (Inst)));
+    Inst = ECStack.back().Caller.getInstruction();
+  } while (!(Inst && isa<InvokeInst>(Inst)));
 
   // Return from invoke
-  ExecutionContext &InvokingSF = ECStack.back ();
-  InvokingSF.Caller = CallSite ();
+  ExecutionContext &InvokingSF = ECStack.back();
+  InvokingSF.Caller = CallSite();
 
   // Go to exceptional destination BB of invoke instruction
   SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
@@ -678,6 +678,13 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
   SwitchToNewBasicBlock(Dest, SF);
 }
 
+void Interpreter::visitIndirectBrInst(IndirectBrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  void *Dest = GVTOP(getOperandValue(I.getAddress(), SF));
+  SwitchToNewBasicBlock((BasicBlock*)Dest, SF);
+}
+
+
 // SwitchToNewBasicBlock - This method is used to jump to a new basic block.
 // This function handles the actual updating of block and instruction iterators
 // as well as execution of all of the PHI nodes in the destination block.
@@ -720,7 +727,7 @@ void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
 //                     Memory Instruction Implementations
 //===----------------------------------------------------------------------===//
 
-void Interpreter::visitAllocationInst(AllocationInst &I) {
+void Interpreter::visitAllocaInst(AllocaInst &I) {
   ExecutionContext &SF = ECStack.back();
 
   const Type *Ty = I.getType()->getElementType();  // Type to be allocated
@@ -749,14 +756,6 @@ void Interpreter::visitAllocationInst(AllocationInst &I) {
     ECStack.back().Allocas.add(Memory);
 }
 
-void Interpreter::visitFreeInst(FreeInst &I) {
-  ExecutionContext &SF = ECStack.back();
-  assert(isa<PointerType>(I.getOperand(0)->getType()) && "Freeing nonptr?");
-  GenericValue Value = getOperandValue(I.getOperand(0), SF);
-  // TODO: Check to make sure memory is allocated
-  free(GVTOP(Value));   // Free memory
-}
-
 // getElementOffset - The workhorse for getelementptr.
 //
 GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
@@ -835,7 +834,7 @@ void Interpreter::visitCallSite(CallSite CS) {
 
   // Check to see if this is an intrinsic function call...
   Function *F = CS.getCalledFunction();
-  if (F && F->isDeclaration ())
+  if (F && F->isDeclaration())
     switch (F->getIntrinsicID()) {
     case Intrinsic::not_intrinsic:
       break;
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index e026287..038830c 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -19,7 +19,7 @@
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -135,12 +135,12 @@ public:
   void visitReturnInst(ReturnInst &I);
   void visitBranchInst(BranchInst &I);
   void visitSwitchInst(SwitchInst &I);
+  void visitIndirectBrInst(IndirectBrInst &I);
 
   void visitBinaryOperator(BinaryOperator &I);
   void visitICmpInst(ICmpInst &I);
   void visitFCmpInst(FCmpInst &I);
-  void visitAllocationInst(AllocationInst &I);
-  void visitFreeInst(FreeInst &I);
+  void visitAllocaInst(AllocaInst &I);
   void visitLoadInst(LoadInst &I);
   void visitStoreInst(StoreInst &I);
   void visitGetElementPtrInst(GetElementPtrInst &I);
@@ -203,6 +203,7 @@ private:  // Helper functions
   void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
 
   void *getPointerToFunction(Function *F) { return (void*)F; }
+  void *getPointerToBasicBlock(BasicBlock *BB) { return (void*)BB; }
 
   void initializeExecutionEngine() { }
   void initializeExternalFunctions();
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index b2a268b..e21d760 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -556,10 +556,10 @@ void JIT::NotifyFunctionEmitted(
   }
 }
 
-void JIT::NotifyFreeingMachineCode(const Function &F, void *OldPtr) {
+void JIT::NotifyFreeingMachineCode(void *OldPtr) {
   MutexGuard locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
-    EventListeners[I]->NotifyFreeingMachineCode(F, OldPtr);
+    EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
   }
 }
 
@@ -599,7 +599,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
   isAlreadyCodeGenerating = false;
 
   // If the function referred to another function that had not yet been
-  // read from bitcode, but we are jitting non-lazily, emit it now.
+  // read from bitcode, and we are jitting non-lazily, emit it now.
   while (!jitstate->getPendingFunctions(locked).empty()) {
     Function *PF = jitstate->getPendingFunctions(locked).back();
     jitstate->getPendingFunctions(locked).pop_back();
@@ -616,7 +616,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
   
   // If the JIT is configured to emit info so that dlsym can be used to
   // rewrite stubs to external globals, do so now.
-  if (areDlsymStubsEnabled() && isLazyCompilationDisabled())
+  if (areDlsymStubsEnabled() && !isCompilingLazily())
     updateDlsymStubTable();
 }
 
@@ -659,7 +659,7 @@ void *JIT::getPointerToFunction(Function *F) {
       return Addr;
   }
 
-  if (F->isDeclaration()) {
+  if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure =
       !areDlsymStubsEnabled() && !F->hasExternalWeakLinkage();
     void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 525cc84..fb3cb24 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -128,6 +128,11 @@ public:
   ///
   void *getPointerToFunction(Function *F);
 
+  void *getPointerToBasicBlock(BasicBlock *BB) {
+    assert(0 && "JIT does not support address-of-label yet!");
+    return 0;
+  }
+  
   /// getOrEmitGlobalVariable - Return the address of the specified global
   /// variable, possibly emitting it to memory if needed.  This is used by the
   /// Emitter.
@@ -183,7 +188,7 @@ public:
   void NotifyFunctionEmitted(
       const Function &F, void *Code, size_t Size,
       const JITEvent_EmittedFunctionDetails &Details);
-  void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+  void NotifyFreeingMachineCode(void *OldPtr);
 
 private:
   static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
index dce506b..7e53d78 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -16,7 +16,7 @@
 #define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 
 // This must be kept in sync with gdb/gdb/jit.h .
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 073d6fb..79f1eb4 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -46,6 +46,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
 #include <algorithm>
 #ifndef NDEBUG
 #include <iomanip>
@@ -62,12 +63,29 @@ static JIT *TheJIT = 0;
 // JIT lazy compilation code.
 //
 namespace {
+  class JITResolverState;
+
+  template<typename ValueTy>
+  struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
+    typedef JITResolverState *ExtraData;
+    static void onRAUW(JITResolverState *, Value *Old, Value *New) {
+      assert(false && "The JIT doesn't know how to handle a"
+             " RAUW on a value it has emitted.");
+    }
+  };
+
+  struct CallSiteValueMapConfig : public NoRAUWValueMapConfig<Function*> {
+    typedef JITResolverState *ExtraData;
+    static void onDelete(JITResolverState *JRS, Function *F);
+  };
+
   class JITResolverState {
   public:
-    typedef DenseMap<AssertingVH<Function>, void*> FunctionToStubMapTy;
+    typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
+      FunctionToStubMapTy;
     typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
-    typedef DenseMap<AssertingVH<Function>, SmallPtrSet<void*, 1> >
-            FunctionToCallSitesMapTy;
+    typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
+                     CallSiteValueMapConfig> FunctionToCallSitesMapTy;
     typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
   private:
     /// FunctionToStubMap - Keep track of the stub created for a particular
@@ -84,6 +102,9 @@ namespace {
     GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
 
   public:
+    JITResolverState() : FunctionToStubMap(this),
+                         FunctionToCallSitesMap(this) {}
+
     FunctionToStubMapTy& getFunctionToStubMap(const MutexGuard& locked) {
       assert(locked.holds(TheJIT->lock));
       return FunctionToStubMap;
@@ -111,8 +132,10 @@ namespace {
     void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
       assert(locked.holds(TheJIT->lock));
 
-      assert(CallSiteToFunctionMap.insert(std::make_pair(CallSite, F)).second &&
-             "Pair was already in CallSiteToFunctionMap");
+      bool Inserted = CallSiteToFunctionMap.insert(
+          std::make_pair(CallSite, F)).second;
+      (void)Inserted;
+      assert(Inserted && "Pair was already in CallSiteToFunctionMap");
       FunctionToCallSitesMap[F].insert(CallSite);
     }
 
@@ -142,8 +165,9 @@ namespace {
       FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
       assert(F2C_I != FunctionToCallSitesMap.end() &&
              "FunctionToCallSitesMap broken");
-      assert(F2C_I->second.erase(Stub) &&
-             "FunctionToCallSitesMap broken");
+      bool Erased = F2C_I->second.erase(Stub);
+      (void)Erased;
+      assert(Erased && "FunctionToCallSitesMap broken");
       if (F2C_I->second.empty())
         FunctionToCallSitesMap.erase(F2C_I);
 
@@ -152,13 +176,17 @@ namespace {
 
     void EraseAllCallSites(const MutexGuard &locked, Function *F) {
       assert(locked.holds(TheJIT->lock));
+      EraseAllCallSitesPrelocked(F);
+    }
+    void EraseAllCallSitesPrelocked(Function *F) {
       FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
       if (F2C == FunctionToCallSitesMap.end())
         return;
       for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
              E = F2C->second.end(); I != E; ++I) {
-        assert(CallSiteToFunctionMap.erase(*I) == 1 &&
-               "Missing call site->function mapping");
+        bool Erased = CallSiteToFunctionMap.erase(*I);
+        (void)Erased;
+        assert(Erased && "Missing call site->function mapping");
       }
       FunctionToCallSitesMap.erase(F2C);
     }
@@ -245,6 +273,10 @@ namespace {
 
 JITResolver *JITResolver::TheJITResolver = 0;
 
+void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
+  JRS->EraseAllCallSitesPrelocked(F);
+}
+
 /// getFunctionStubIfAvailable - This returns a pointer to a function stub
 /// if it has already been created.
 void *JITResolver::getFunctionStubIfAvailable(Function *F) {
@@ -263,11 +295,11 @@ void *JITResolver::getFunctionStub(Function *F) {
   void *&Stub = state.getFunctionToStubMap(locked)[F];
   if (Stub) return Stub;
 
-  // Call the lazy resolver function unless we are JIT'ing non-lazily, in which
-  // case we must resolve the symbol now.
-  void *Actual = TheJIT->isLazyCompilationDisabled()
-    ? (void *)0 : (void *)(intptr_t)LazyResolverFn;
-  
+  // Call the lazy resolver function if we are JIT'ing lazily.  Otherwise we
+  // must resolve the symbol now.
+  void *Actual = TheJIT->isCompilingLazily()
+    ? (void *)(intptr_t)LazyResolverFn : (void *)0;
+
   // If this is an external declaration, attempt to resolve the address now
   // to place in the stub.
   if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) {
@@ -302,7 +334,7 @@ void *JITResolver::getFunctionStub(Function *F) {
   // If we are JIT'ing non-lazily but need to call a function that does not
   // exist yet, add it to the JIT's work list so that we can fill in the stub
   // address later.
-  if (!Actual && TheJIT->isLazyCompilationDisabled())
+  if (!Actual && !TheJIT->isCompilingLazily())
     if (!F->isDeclaration() || F->hasNotBeenReadFromBitcode())
       TheJIT->addPendingFunction(F);
 
@@ -439,7 +471,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // Otherwise we don't have it, do lazy compilation now.
     
     // If lazy compilation is disabled, emit a useful error message and abort.
-    if (TheJIT->isLazyCompilationDisabled()) {
+    if (!TheJIT->isCompilingLazily()) {
       llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
@@ -550,17 +582,24 @@ namespace {
     JITEvent_EmittedFunctionDetails EmissionDetails;
 
     struct EmittedCode {
-      void *FunctionBody;
+      void *FunctionBody;  // Beginning of the function's allocation.
+      void *Code;  // The address the function's code actually starts at.
       void *ExceptionTable;
-      EmittedCode() : FunctionBody(0), ExceptionTable(0) {}
+      EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+    };
+    struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+      typedef JITEmitter *ExtraData;
+      static void onDelete(JITEmitter *, const Function*);
+      static void onRAUW(JITEmitter *, const Function*, const Function*);
     };
-    DenseMap<const Function *, EmittedCode> EmittedFunctions;
+    ValueMap<const Function *, EmittedCode,
+             EmittedFunctionConfig> EmittedFunctions;
 
     // CurFnStubUses - For a given Function, a vector of stubs that it
     // references.  This facilitates the JIT detecting that a stub is no
     // longer used, so that it may be deallocated.
-    DenseMap<const Function *, SmallVector<void*, 1> > CurFnStubUses;
-    
+    DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
+
     // StubFnRefs - For a given pointer to a stub, a set of Functions which
     // reference the stub.  When the count of a stub's references drops to zero,
     // the stub is unused.
@@ -574,7 +613,8 @@ namespace {
 
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
-        : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0) {
+        : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0),
+          EmittedFunctions(this) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
@@ -729,7 +769,7 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
   // mechanism is capable of rewriting the instruction directly, prefer to do
   // that instead of emitting a stub.  This uses the lazy resolver, so is not
   // legal if lazy compilation is disabled.
-  if (DoesntNeedStub && !TheJIT->isLazyCompilationDisabled())
+  if (DoesntNeedStub && TheJIT->isCompilingLazily())
     return Resolver.AddCallbackAtLocation(F, Reference);
 
   // Otherwise, we have to emit a stub.
@@ -1030,6 +1070,7 @@ void JITEmitter::startFunction(MachineFunction &F) {
   // About to start emitting the machine code for the function.
   emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
   TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
+  EmittedFunctions[F.getFunction()].Code = CurBufferPtr;
 
   MBBLocations.clear();
 
@@ -1253,12 +1294,15 @@ void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
 
 /// deallocateMemForFunction - Deallocate all memory for the specified
 /// function body.  Also drop any references the function has to stubs.
+/// May be called while the Function is being destroyed inside ~Value().
 void JITEmitter::deallocateMemForFunction(const Function *F) {
-  DenseMap<const Function *, EmittedCode>::iterator Emitted =
-    EmittedFunctions.find(F);
+  ValueMap<const Function *, EmittedCode, EmittedFunctionConfig>::iterator
+    Emitted = EmittedFunctions.find(F);
   if (Emitted != EmittedFunctions.end()) {
     MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
     MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable);
+    TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
+
     EmittedFunctions.erase(Emitted);
   }
 
@@ -1487,6 +1531,17 @@ uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
   return (uintptr_t)((char *)JumpTableBase + Offset);
 }
 
+void JITEmitter::EmittedFunctionConfig::onDelete(
+  JITEmitter *Emitter, const Function *F) {
+  Emitter->deallocateMemForFunction(F);
+}
+void JITEmitter::EmittedFunctionConfig::onRAUW(
+  JITEmitter *, const Function*, const Function*) {
+  llvm_unreachable("The JIT doesn't know how to handle a"
+                   " RAUW on a value it has emitted.");
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Public interface to this file
 //===----------------------------------------------------------------------===//
@@ -1625,13 +1680,9 @@ void JIT::updateDlsymStubTable() {
 /// freeMachineCodeForFunction - release machine code memory for given Function.
 ///
 void JIT::freeMachineCodeForFunction(Function *F) {
-
   // Delete translation for this from the ExecutionEngine, so it will get
   // retranslated next time it is used.
-  void *OldPtr = updateGlobalMapping(F, 0);
-
-  if (OldPtr)
-    TheJIT->NotifyFreeingMachineCode(*F, OldPtr);
+  updateGlobalMapping(F, 0);
 
   // Free the actual memory for the function body and related stuff.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
diff --git a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
deleted file mode 100644
index 53585b8..0000000
--- a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-//===-- MacOSJITEventListener.cpp - Save symbol table for OSX perf tools --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITEventListener object that records JITted functions to
-// a global __jitSymbolTable linked list.  Apple's performance tools use this to
-// determine a symbol name and accurate code range for a PC value.  Because
-// performance tools are generally asynchronous, the code below is written with
-// the hope that it could be interrupted at any time and have useful answers.
-// However, we don't go crazy with atomic operations, we just do a "reasonable
-// effort".
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "macos-jit-event-listener"
-#include "llvm/Function.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
-#include <stddef.h>
-using namespace llvm;
-
-#ifdef __APPLE__
-#define ENABLE_JIT_SYMBOL_TABLE 0
-#endif
-
-#if ENABLE_JIT_SYMBOL_TABLE
-
-namespace {
-
-/// JITSymbolEntry - Each function that is JIT compiled results in one of these
-/// being added to an array of symbols.  This indicates the name of the function
-/// as well as the address range it occupies.  This allows the client to map
-/// from a PC value to the name of the function.
-struct JITSymbolEntry {
-  const char *FnName;   // FnName - a strdup'd string.
-  void *FnStart;
-  intptr_t FnSize;
-};
-
-
-struct JITSymbolTable {
-  /// NextPtr - This forms a linked list of JitSymbolTable entries.  This
-  /// pointer is not used right now, but might be used in the future.  Consider
-  /// it reserved for future use.
-  JITSymbolTable *NextPtr;
-  
-  /// Symbols - This is an array of JitSymbolEntry entries.  Only the first
-  /// 'NumSymbols' symbols are valid.
-  JITSymbolEntry *Symbols;
-  
-  /// NumSymbols - This indicates the number entries in the Symbols array that
-  /// are valid.
-  unsigned NumSymbols;
-  
-  /// NumAllocated - This indicates the amount of space we have in the Symbols
-  /// array.  This is a private field that should not be read by external tools.
-  unsigned NumAllocated;
-};
-
-class MacOSJITEventListener : public JITEventListener {
-public:
-  virtual void NotifyFunctionEmitted(const Function &F,
-                                     void *FnStart, size_t FnSize,
-                                     const EmittedFunctionDetails &Details);
-  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
-};
-
-}  // anonymous namespace.
-
-// This is a public symbol so the performance tools can find it.
-JITSymbolTable *__jitSymbolTable;
-
-namespace llvm {
-JITEventListener *createMacOSJITEventListener() {
-  return new MacOSJITEventListener;
-}
-}
-
-// Adds the just-emitted function to the symbol table.
-void MacOSJITEventListener::NotifyFunctionEmitted(
-    const Function &F, void *FnStart, size_t FnSize,
-    const EmittedFunctionDetails &) {
-  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
-  JITSymbolTable **SymTabPtrPtr = 0;
-  SymTabPtrPtr = &__jitSymbolTable;
-
-  // If this is the first entry in the symbol table, add the JITSymbolTable
-  // index.
-  if (*SymTabPtrPtr == 0) {
-    JITSymbolTable *New = new JITSymbolTable();
-    New->NextPtr = 0;
-    New->Symbols = 0;
-    New->NumSymbols = 0;
-    New->NumAllocated = 0;
-    *SymTabPtrPtr = New;
-  }
-
-  JITSymbolTable *SymTabPtr = *SymTabPtrPtr;
-
-  // If we have space in the table, reallocate the table.
-  if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) {
-    // If we don't have space, reallocate the table.
-    unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2);
-    JITSymbolEntry *NewSymbols = new JITSymbolEntry[NewSize];
-    JITSymbolEntry *OldSymbols = SymTabPtr->Symbols;
-
-    // Copy the old entries over.
-    memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0]));
-
-    // Swap the new symbols in, delete the old ones.
-    SymTabPtr->Symbols = NewSymbols;
-    SymTabPtr->NumAllocated = NewSize;
-    delete [] OldSymbols;
-  }
-
-  // Otherwise, we have enough space, just tack it onto the end of the array.
-  JITSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
-  Entry.FnName = strdup(F.getName().data());
-  Entry.FnStart = FnStart;
-  Entry.FnSize = FnSize;
-  ++SymTabPtr->NumSymbols;
-}
-
-// Removes the to-be-deleted function from the symbol table.
-void MacOSJITEventListener::NotifyFreeingMachineCode(
-    const Function &, void *FnStart) {
-  assert(FnStart && "Invalid function pointer");
-  JITSymbolTable **SymTabPtrPtr = 0;
-  SymTabPtrPtr = &__jitSymbolTable;
-
-  JITSymbolTable *SymTabPtr = *SymTabPtrPtr;
-  JITSymbolEntry *Symbols = SymTabPtr->Symbols;
-
-  // Scan the table to find its index.  The table is not sorted, so do a linear
-  // scan.
-  unsigned Index;
-  for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index)
-    assert(Index != SymTabPtr->NumSymbols && "Didn't find function!");
-
-  // Once we have an index, we know to nuke this entry, overwrite it with the
-  // entry at the end of the array, making the last entry redundant.
-  const char *OldName = Symbols[Index].FnName;
-  Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1];
-  free((void*)OldName);
-
-  // Drop the number of symbols in the table.
-  --SymTabPtr->NumSymbols;
-
-  // Finally, if we deleted the final symbol, deallocate the table itself.
-  if (SymTabPtr->NumSymbols != 0)
-    return;
-
-  *SymTabPtrPtr = 0;
-  delete [] Symbols;
-  delete SymTabPtr;
-}
-
-#else  // !ENABLE_JIT_SYMBOL_TABLE
-
-namespace llvm {
-// By defining this to return NULL, we can let clients call it unconditionally,
-// even if they aren't on an Apple system.
-JITEventListener *createMacOSJITEventListener() {
-  return NULL;
-}
-}  // namespace llvm
-
-#endif  // ENABLE_JIT_SYMBOL_TABLE
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 00c4af7..b45c71f 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -43,7 +43,7 @@ public:
   virtual void NotifyFunctionEmitted(const Function &F,
                                      void *FnStart, size_t FnSize,
                                      const EmittedFunctionDetails &Details);
-  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+  virtual void NotifyFreeingMachineCode(void *OldPtr);
 };
 
 OProfileJITEventListener::OProfileJITEventListener()
@@ -147,13 +147,13 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
   }
 }
 
-// Removes the to-be-deleted function from the symbol table.
-void OProfileJITEventListener::NotifyFreeingMachineCode(
-    const Function &F, void *FnStart) {
+// Removes the being-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
   assert(FnStart && "Invalid function pointer");
   if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
-    DEBUG(errs() << "Failed to tell OProfile about unload of native function "
-                 << F.getName() << " at " << FnStart << "\n");
+    DEBUG(errs()
+          << "Failed to tell OProfile about unload of native function at "
+          << FnStart << "\n");
   }
 }
 
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index e64c200..8ece835 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -349,8 +349,7 @@ static void PrintMap(const std::map<const Value*, Value*> &M) {
 
 // RemapOperand - Use ValueMap to convert constants from one module to another.
 static Value *RemapOperand(const Value *In,
-                           std::map<const Value*, Value*> &ValueMap,
-                           LLVMContext &Context) {
+                           std::map<const Value*, Value*> &ValueMap) {
   std::map<const Value*,Value*>::const_iterator I = ValueMap.find(In);
   if (I != ValueMap.end())
     return I->second;
@@ -365,31 +364,29 @@ static Value *RemapOperand(const Value *In,
     if (const ConstantArray *CPA = dyn_cast<ConstantArray>(CPV)) {
       std::vector<Constant*> Operands(CPA->getNumOperands());
       for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
-        Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap, 
-                                                 Context));
-      Result =
-          ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
+        Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap));
+      Result = ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
     } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(CPV)) {
       std::vector<Constant*> Operands(CPS->getNumOperands());
       for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
-        Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap,
-                                                 Context));
-      Result =
-         ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
+        Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap));
+      Result = ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
     } else if (isa<ConstantPointerNull>(CPV) || isa<UndefValue>(CPV)) {
       Result = const_cast<Constant*>(CPV);
     } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CPV)) {
       std::vector<Constant*> Operands(CP->getNumOperands());
       for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-        Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap,
-                                     Context));
+        Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap));
       Result = ConstantVector::get(Operands);
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
       std::vector<Constant*> Ops;
       for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
-        Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap,
-                                     Context)));
+        Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap)));
       Result = CE->getWithOperands(Ops);
+    } else if (const BlockAddress *CE = dyn_cast<BlockAddress>(CPV)) {
+      Result = BlockAddress::get(
+                 cast<Function>(RemapOperand(CE->getFunction(), ValueMap)),
+                                 CE->getBasicBlock());
     } else {
       assert(!isa<GlobalValue>(CPV) && "Unmapped global?");
       llvm_unreachable("Unknown type of derived type constant value!");
@@ -896,8 +893,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
     if (SGV->hasInitializer()) {      // Only process initialized GV's
       // Figure out what the initializer looks like in the dest module...
       Constant *SInit =
-        cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap,
-                       Dest->getContext()));
+        cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap));
       // Grab destination global variable or alias.
       GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
 
@@ -1084,7 +1080,7 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
       for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
            OI != OE; ++OI)
         if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
-          *OI = RemapOperand(*OI, ValueMap, Dest->getContext());
+          *OI = RemapOperand(*OI, ValueMap);
 
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 74fb930..3e5c97d 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <cctype>
 #include <cstring>
 using namespace llvm;
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 7e42e2d..b9b323c 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -421,7 +421,7 @@ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
   unsigned int count, partBits;
   integerPart part, boundary;
 
-  assert (bits != 0);
+  assert(bits != 0);
 
   bits--;
   count = bits / integerPartWidth;
@@ -537,7 +537,7 @@ partAsHex (char *dst, integerPart part, unsigned int count,
 {
   unsigned int result = count;
 
-  assert (count != 0 && count <= integerPartWidth / 4);
+  assert(count != 0 && count <= integerPartWidth / 4);
 
   part >>= (integerPartWidth - 4 * count);
   while (count--) {
@@ -760,7 +760,7 @@ APFloat::significandParts()
 {
   assert(category == fcNormal || category == fcNaN);
 
-  if(partCount() > 1)
+  if (partCount() > 1)
     return significand.parts;
   else
     return &significand.part;
@@ -2289,8 +2289,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
 
     /* Both multiplySignificand and divideSignificand return the
        result with the integer bit set.  */
-    assert (APInt::tcExtractBit
-            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+    assert(APInt::tcExtractBit
+           (decSig.significandParts(), calcSemantics.precision - 1) == 1);
 
     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
                        powHUerr);
@@ -2593,7 +2593,7 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
       q--;
       *q = hexDigitChars[hexDigitValue (*q) + 1];
     } while (*q == '0');
-    assert (q >= p);
+    assert(q >= p);
   } else {
     /* Add trailing zeroes.  */
     memset (dst, '0', outputDigits);
@@ -2645,7 +2645,7 @@ APInt
 APFloat::convertF80LongDoubleAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
-  assert (partCount()==2);
+  assert(partCount()==2);
 
   uint64_t myexponent, mysignificand;
 
@@ -2677,7 +2677,7 @@ APInt
 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
-  assert (partCount()==2);
+  assert(partCount()==2);
 
   uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
 
@@ -2722,7 +2722,7 @@ APInt
 APFloat::convertQuadrupleAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
-  assert (partCount()==2);
+  assert(partCount()==2);
 
   uint64_t myexponent, mysignificand, mysignificand2;
 
@@ -2758,7 +2758,7 @@ APInt
 APFloat::convertDoubleAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
-  assert (partCount()==1);
+  assert(partCount()==1);
 
   uint64_t myexponent, mysignificand;
 
@@ -2788,7 +2788,7 @@ APInt
 APFloat::convertFloatAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
-  assert (partCount()==1);
+  assert(partCount()==1);
 
   uint32_t myexponent, mysignificand;
 
@@ -2817,7 +2817,7 @@ APInt
 APFloat::convertHalfAPFloatToAPInt() const
 {
   assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
-  assert (partCount()==1);
+  assert(partCount()==1);
 
   uint32_t myexponent, mysignificand;
 
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 7a3fd87..31b45c8 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/Recycler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Memory.h"
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 71ff411..d4954b6 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -57,7 +57,18 @@ DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
 bool llvm::isCurrentDebugType(const char *DebugType) {
   return CurrentDebugType.empty() || DebugType == CurrentDebugType;
 }
+
+/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified.  Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void SetCurrentDebugType(const char *Type) {
+  CurrentDebugType = Type;
+}
+
 #else
 // Avoid "has no symbols" warning.
+namespace llvm {
 int Debug_dummy = 0;
+}
 #endif
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index e35c626..d8b6b9f 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -226,7 +226,7 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
   size_t BytesLeft = FileSize;
   while (BytesLeft) {
     ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
-    if (NumRead != -1) {
+    if (NumRead > 0) {
       BytesLeft -= NumRead;
       BufPtr += NumRead;
     } else if (errno == EINTR) {
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index 6efab94..7eb9f5f 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -15,7 +15,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/System/DynamicLibrary.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Config/config.h"
 #include <cstdio>
 #include <cstring>
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index df33574..8e1fa53 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/System/Path.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstring>
 #include <ostream>
diff --git a/lib/System/Unix/Memory.inc b/lib/System/Unix/Memory.inc
index a80f56f..1b038f9 100644
--- a/lib/System/Unix/Memory.inc
+++ b/lib/System/Unix/Memory.inc
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Unix.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/System/Process.h"
 
 #ifdef HAVE_SYS_MMAN_H
diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc
index 94e4c1b..911b8c3 100644
--- a/lib/System/Unix/Process.inc
+++ b/lib/System/Unix/Process.inc
@@ -18,7 +18,9 @@
 #ifdef HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
-#ifdef HAVE_MALLOC_H
+// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
+//  Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
 #include <malloc.h>
 #endif
 #ifdef HAVE_MALLOC_MALLOC_H
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index d39e1e9..676e1e5 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -209,7 +209,7 @@ static void PrintStackTrace(void *) {
     Dl_info dlinfo;
     dladdr(StackTrace[i], &dlinfo);
 
-    fprintf(stderr, "%-3d", i);
+    fprintf(stderr, "%-2d", i);
 
     const char* name = strrchr(dlinfo.dli_fname, '/');
     if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
diff --git a/lib/System/Win32/Memory.inc b/lib/System/Win32/Memory.inc
index 7611ecd..19fccbd 100644
--- a/lib/System/Win32/Memory.inc
+++ b/lib/System/Win32/Memory.inc
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Win32.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/System/Process.h"
 
 namespace llvm {
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 487ce1d..76cc06e 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -105,6 +105,7 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createNEONPreAllocPass();
+FunctionPass *createNEONMoveFixPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 8851fbb..cb9bd6a 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -125,12 +125,16 @@ def ARMInstrInfo : InstrInfo {
                        "SizeFlag",
                        "IndexModeBits",
                        "Form",
-                       "isUnaryDataProc"];
+                       "isUnaryDataProc",
+                       "canXformTo16Bit",
+                       "Dom"];
   let TSFlagsShifts = [0,
                        4,
                        7,
                        9,
-                       15];
+                       15,
+                       16,
+                       17];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ecdf5a0..7c5b0f0 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -16,6 +16,7 @@
 #include "ARMAddressingModes.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,6 +26,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -32,8 +34,9 @@ static cl::opt<bool>
 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                cl::desc("Enable ARM 2-addr to 3-addr conv"));
 
-ARMBaseInstrInfo::ARMBaseInstrInfo()
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) {
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+    Subtarget(STI) {
 }
 
 MachineInstr *
@@ -249,7 +252,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
   // ...likewise if it ends with a branch table followed by an unconditional
   // branch. The branch folder can create these, and we must get rid of them for
   // correctness of Thumb constant islands.
-  if (isJumpTableBranchOpcode(SecondLastOpc) &&
+  if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+       isIndirectBranchOpcode(SecondLastOpc)) &&
       isUncondBranchOpcode(LastOpc)) {
     I = LastInst;
     if (AllowModify)
@@ -444,7 +448,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case ARM::Int_eh_sjlj_setjmp:
       return 24;
     case ARM::t2Int_eh_sjlj_setjmp:
-      return 20;
+      return 22;
     case ARM::BR_JTr:
     case ARM::BR_JTm:
     case ARM::BR_JTadd:
@@ -503,7 +507,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
   case ARM::FCPYS:
   case ARM::FCPYD:
   case ARM::VMOVD:
-  case  ARM::VMOVQ: {
+  case ARM::VMOVQ: {
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
     return true;
@@ -615,28 +619,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC != SrcRC) {
-    // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies
-    // Allow QPR / QPR_VFP2 cross-class copies
-    if (DestRC == ARM::DPRRegisterClass) {
-      if (SrcRC == ARM::DPR_VFP2RegisterClass ||
-          SrcRC == ARM::DPR_8RegisterClass) {
-      } else
-        return false;
-    } else if (DestRC == ARM::DPR_VFP2RegisterClass) {
-      if (SrcRC == ARM::DPRRegisterClass ||
-          SrcRC == ARM::DPR_8RegisterClass) {
-      } else
-        return false;
-    } else if (DestRC == ARM::DPR_8RegisterClass) {
-      if (SrcRC == ARM::DPRRegisterClass ||
-          SrcRC == ARM::DPR_VFP2RegisterClass) {
-      } else
-        return false;
-    } else if ((DestRC == ARM::QPRRegisterClass &&
-                SrcRC == ARM::QPR_VFP2RegisterClass) ||
-               (DestRC == ARM::QPR_VFP2RegisterClass &&
-                SrcRC == ARM::QPRRegisterClass)) {
-    } else
+    if (DestRC->getSize() != SrcRC->getSize())
+      return false;
+
+    // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
+    // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
+    if (DestRC->getSize() != 8 && DestRC->getSize() != 16)
       return false;
   }
 
@@ -646,13 +634,18 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   } else if (DestRC == ARM::SPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
                    .addReg(SrcReg));
-  } else if ((DestRC == ARM::DPRRegisterClass) ||
-             (DestRC == ARM::DPR_VFP2RegisterClass) ||
-             (DestRC == ARM::DPR_8RegisterClass)) {
+  } else if (DestRC == ARM::DPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
                    .addReg(SrcReg));
+  } else if (DestRC == ARM::DPR_VFP2RegisterClass ||
+             DestRC == ARM::DPR_8RegisterClass ||
+             SrcRC == ARM::DPR_VFP2RegisterClass ||
+             SrcRC == ARM::DPR_8RegisterClass) {
+    // Always use neon reg-reg move if source or dest is NEON-only regclass.
+    BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg);
   } else if (DestRC == ARM::QPRRegisterClass ||
-             DestRC == ARM::QPR_VFP2RegisterClass) {
+             DestRC == ARM::QPR_VFP2RegisterClass ||
+             DestRC == ARM::QPR_8RegisterClass) {
     BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
   } else {
     return false;
@@ -727,9 +720,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   } else {
     assert((RC == ARM::QPRRegisterClass ||
-            RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
+            RC == ARM::QPR_VFP2RegisterClass ||
+            RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
     // FIXME: Neon instructions should support predicates
-    BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
+      addMemOperand(MMO);
   }
 }
 
@@ -749,18 +744,24 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned SrcSubReg = MI->getOperand(1).getSubReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
       if (Opc == ARM::MOVr)
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
-          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+          .addReg(SrcReg,
+                  getKillRegState(isKill) | getUndefRegState(isUndef),
+                  SrcSubReg)
           .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
       else // ARM::t2MOVr
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
-          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+          .addReg(SrcReg,
+                  getKillRegState(isKill) | getUndefRegState(isUndef),
+                  SrcSubReg)
           .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned DstSubReg = MI->getOperand(0).getSubReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
       if (Opc == ARM::MOVr)
@@ -768,14 +769,14 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
           .addReg(DstReg,
                   RegState::Define |
                   getDeadRegState(isDead) |
-                  getUndefRegState(isUndef))
+                  getUndefRegState(isUndef), DstSubReg)
           .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
       else // ARM::t2MOVr
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
           .addReg(DstReg,
                   RegState::Define |
                   getDeadRegState(isDead) |
-                  getUndefRegState(isUndef))
+                  getUndefRegState(isUndef), DstSubReg)
           .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
   } else if (Opc == ARM::tMOVgpr2gpr ||
@@ -783,20 +784,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
              Opc == ARM::tMOVgpr2tgpr) {
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned SrcSubReg = MI->getOperand(1).getSubReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
-        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addReg(SrcReg,
+                getKillRegState(isKill) | getUndefRegState(isUndef),
+                SrcSubReg)
         .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned DstSubReg = MI->getOperand(0).getSubReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
         .addReg(DstReg,
                 RegState::Define |
                 getDeadRegState(isDead) |
-                getUndefRegState(isUndef))
+                getUndefRegState(isUndef),
+                DstSubReg)
         .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
     }
   } else if (Opc == ARM::FCPYS) {
@@ -804,21 +810,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned SrcSubReg = MI->getOperand(1).getSubReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
-        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
+                SrcSubReg)
         .addFrameIndex(FI)
         .addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned DstSubReg = MI->getOperand(0).getSubReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
         .addReg(DstReg,
                 RegState::Define |
                 getDeadRegState(isDead) |
-                getUndefRegState(isUndef))
+                getUndefRegState(isUndef),
+                DstSubReg)
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
   }
@@ -827,20 +837,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned SrcSubReg = MI->getOperand(1).getSubReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
-        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addReg(SrcReg,
+                getKillRegState(isKill) | getUndefRegState(isUndef),
+                SrcSubReg)
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned DstSubReg = MI->getOperand(0).getSubReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
         .addReg(DstReg,
                 RegState::Define |
                 getDeadRegState(isDead) |
-                getUndefRegState(isUndef))
+                getUndefRegState(isUndef),
+                DstSubReg)
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
   }
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index a13155b..2ba3774 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -131,6 +131,14 @@ namespace ARMII {
     Xform16Bit    = 1 << 16,
 
     //===------------------------------------------------------------------===//
+    // Code domain.
+    DomainShift   = 17,
+    DomainMask    = 3 << DomainShift,
+    DomainGeneral = 0 << DomainShift,
+    DomainVFP     = 1 << DomainShift,
+    DomainNEON    = 2 << DomainShift,
+
+    //===------------------------------------------------------------------===//
     // Field shifts - such shifts are used to set field while generating
     // machine instructions.
     M_BitShift     = 5,
@@ -157,9 +165,10 @@ namespace ARMII {
 }
 
 class ARMBaseInstrInfo : public TargetInstrInfoImpl {
+  const ARMSubtarget& Subtarget;
 protected:
   // Can be only subclassed.
-  explicit ARMBaseInstrInfo();
+  explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 public:
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
@@ -173,6 +182,7 @@ public:
                                               LiveVariables *LV) const;
 
   virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+  const ARMSubtarget &getSubtarget() const { return Subtarget; }
 
   // Branch analysis.
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -293,6 +303,11 @@ bool isJumpTableBranchOpcode(int Opc) {
     Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
 }
 
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+  return Opc == ARM::BRIND || Opc == ARM::tBRIND;
+}
+
 /// getInstrPredicate - If instruction is predicated, returns its predicate
 /// condition, otherwise returns AL. It also returns the condition code
 /// register by reference.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 00e7531..c1c531c 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -40,13 +41,12 @@
 using namespace llvm;
 
 static cl::opt<bool>
-ScavengeFrameIndexVals("arm-virtual-frame-index-vals", cl::Hidden,
-          cl::init(false),
-          cl::desc("Resolve frame index values via scavenging in PEI"));
+ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
+          cl::desc("Reuse repeated frame index values"));
 
 static cl::opt<bool>
-ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(false),
-          cl::desc("Reuse repeated frame index values"));
+ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false),
+          cl::desc("Dynamically re-align the stack as needed"));
 
 unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
                                                    bool *isSPVFP) {
@@ -254,6 +254,42 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
 }
 
 const TargetRegisterClass *
+ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                              const TargetRegisterClass *B,
+                                              unsigned SubIdx) const {
+  switch (SubIdx) {
+  default: return 0;
+  case 1:
+  case 2:
+  case 3:
+  case 4:
+    // S sub-registers.
+    if (A->getSize() == 8) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::DPR_8RegClass;
+      assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
+      if (A == &ARM::DPR_8RegClass)
+        return A;
+      return &ARM::DPR_VFP2RegClass;
+    }
+
+    assert(A->getSize() == 16 && "Expecting a Q register class!");
+    if (B == &ARM::SPR_8RegClass)
+      return &ARM::QPR_8RegClass;
+    return &ARM::QPR_VFP2RegClass;
+  case 5:
+  case 6:
+    // D sub-registers.
+    if (B == &ARM::DPR_VFP2RegClass)
+      return &ARM::QPR_VFP2RegClass;
+    if (B == &ARM::DPR_8RegClass)
+      return &ARM::QPR_8RegClass;
+    return A;
+  }
+  return 0;
+}
+
+const TargetRegisterClass *
 ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return ARM::GPRRegisterClass;
 }
@@ -439,6 +475,21 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
   }
 }
 
+static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
+  unsigned MaxAlign = 0;
+
+  for (int i = FFI->getObjectIndexBegin(),
+         e = FFI->getObjectIndexEnd(); i != e; ++i) {
+    if (FFI->isDeadObjectIndex(i))
+      continue;
+
+    unsigned Align = FFI->getObjectAlignment(i);
+    MaxAlign = std::max(MaxAlign, Align);
+  }
+
+  return MaxAlign;
+}
+
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@@ -446,10 +497,27 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return (NoFramePointerElim ||
+          needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
 }
 
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+  // Only do this for ARM if explicitly enabled
+  // FIXME: Once it's passing all the tests, enable by default
+  if (!ARMDynamicStackAlign)
+    return false;
+
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+  return (RealignStack &&
+          !AFI->isThumb1OnlyFunction() &&
+          (MFI->getMaxAlignment() > StackAlign) &&
+          !MFI->hasVarSizedObjects());
+}
+
 bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   if (NoFramePointerElim && MFI->hasCalls())
@@ -525,6 +593,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   SmallVector<unsigned, 4> UnspilledCS2GPRs;
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Calculate and set max stack object alignment early, so we can decide
+  // whether we will need stack realignment (and thus FP).
+  if (ARMDynamicStackAlign) {
+    unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
+                                 calculateMaxStackAlignment(MFI));
+    MFI->setMaxAlignment(MaxAlign);
+  }
+
   // Don't spill FP if the frame can be eliminated. This is determined
   // by scanning the callee-save registers to see if any is used.
   const unsigned *CSRegs = getCalleeSavedRegs();
@@ -947,7 +1025,7 @@ requiresRegisterScavenging(const MachineFunction &MF) const {
 
 bool ARMBaseRegisterInfo::
 requiresFrameIndexScavenging(const MachineFunction &MF) const {
-  return ScavengeFrameIndexVals;
+  return true;
 }
 
 // hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -1025,17 +1103,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-/// findScratchRegister - Find a 'free' ARM register. If register scavenger
-/// is not being used, R12 is available. Otherwise, try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
-                             ARMFunctionInfo *AFI) {
-  unsigned Reg = RS ? RS->FindUnusedReg(RC) : (unsigned) ARM::R12;
-  assert(!AFI->isThumb1OnlyFunction());
-  return Reg;
-}
-
 unsigned
 ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                          int SPAdj, int *Value,
@@ -1057,22 +1124,44 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned FrameReg = ARM::SP;
   int FrameIndex = MI.getOperand(i).getIndex();
   int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
+  bool isFixed = MFI->isFixedObjectIndex(FrameIndex);
 
+  // When doing dynamic stack realignment, all of these need to change(?)
   if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
     Offset -= AFI->getGPRCalleeSavedArea1Offset();
   else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
     Offset -= AFI->getGPRCalleeSavedArea2Offset();
   else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
     Offset -= AFI->getDPRCalleeSavedAreaOffset();
-  else if (hasFP(MF) && AFI->hasStackFrame()) {
+  else if (needsStackRealignment(MF)) {
+    // When dynamically realigning the stack, use the frame pointer for
+    // parameters, and the stack pointer for locals.
+    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+    if (isFixed) {
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+      // When referencing from the frame pointer, stack pointer adjustments
+      // don't matter.
+      SPAdj = 0;
+    }
+  } else if (hasFP(MF) && AFI->hasStackFrame()) {
     assert(SPAdj == 0 && "Unexpected stack offset!");
-    // Use frame pointer to reference fixed objects unless this is a
-    // frameless function,
-    FrameReg = getFrameRegister(MF);
-    Offset -= AFI->getFramePtrSpillOffset();
+    if (isFixed || MFI->hasVarSizedObjects()) {
+      // Use frame pointer to reference fixed objects unless this is a
+      // frameless function.
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+    } else if (AFI->isThumb2Function()) {
+      // In Thumb2 mode, the negative offset is very limited.
+      int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+      if (FPOffset >= -255 && FPOffset < 0) {
+        FrameReg = getFrameRegister(MF);
+        Offset = FPOffset;
+      }
+    }
   }
 
-  // modify MI as necessary to handle as much of 'Offset' as possible
+  // Modify MI as necessary to handle as much of 'Offset' as possible
   bool Done = false;
   if (!AFI->isThumbFunction())
     Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
@@ -1099,19 +1188,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // Must be addrmode4.
     MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
   else {
-    if (!ScavengeFrameIndexVals) {
-      // Insert a set of r12 with the full address: r12 = sp + offset
-      // If the offset we have is too large to fit into the instruction, we need
-      // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
-      // out of 'Offset'.
-      ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI);
-      if (ScratchReg == 0)
-        // No register is "free". Scavenge a register.
-        ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj);
-    } else {
-      ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
-      *Value = Offset;
-    }
+    ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+    if (Value) *Value = Offset;
     if (!AFI->isThumbFunction())
       emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
                               Offset, Pred, PredReg, TII);
@@ -1121,7 +1199,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                              Offset, Pred, PredReg, TII);
     }
     MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
-    if (!ReuseFrameIndexVals || !ScavengeFrameIndexVals)
+    if (!ReuseFrameIndexVals)
       ScratchReg = 0;
   }
   return ScratchReg;
@@ -1276,6 +1354,18 @@ emitPrologue(MachineFunction &MF) const {
   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need dynamic stack realignment, do it here.
+  if (needsStackRealignment(MF)) {
+    unsigned Opc;
+    unsigned MaxAlign = MFI->getMaxAlignment();
+    assert (!AFI->isThumb1OnlyFunction());
+    Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
+
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP)
+                                  .addReg(ARM::SP, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+  }
 }
 
 static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index f7d38e5..029e468 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -74,6 +74,13 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
   const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
 
   std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
@@ -89,6 +96,8 @@ public:
 
   bool hasFP(const MachineFunction &MF) const;
 
+  bool needsStackRealignment(const MachineFunction &MF) const;
+
   bool cannotEliminateFrame(const MachineFunction &MF) const;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7161639..8fdb07f 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -68,6 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
                        "ArgFlags.getOrigAlign() != 8",
                        CCAssignToReg<[R0, R1, R2, R3]>>>,
 
+  CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>,
   CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
   CCIfType<[f64], CCAssignToStack<8, 8>>,
   CCIfType<[v2f64], CCAssignToStack<16, 8>>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 6f1c624..13cf676 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -34,7 +34,6 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -56,8 +55,7 @@ namespace {
   };
 
   template<class CodeEmitter>
-  class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
-                                    public ARMCodeEmitter {
+  class Emitter : public MachineFunctionPass, public ARMCodeEmitter {
     ARMJITInfo                *JTI;
     const ARMInstrInfo        *II;
     const TargetData          *TD;
@@ -430,6 +428,7 @@ void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
     DEBUG(errs() << "  ** ARM constant pool #" << CPI << " @ "
           << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
 
+    assert(ACPV->isGlobalValue() && "unsupported constant pool value");
     GlobalValue *GV = ACPV->getGV();
     if (GV) {
       Reloc::Model RelocM = TM.getRelocationModel();
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index c995ff2..9819625 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -42,6 +41,7 @@ STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
 STATISTIC(NumTBs,        "Number of table branches generated");
 STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
 STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
 
 namespace {
   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -55,7 +55,7 @@ namespace {
   ///   Water   - Potential places where an island could be formed.
   ///   CPE     - A constant pool entry that has been placed somewhere, which
   ///             tracks a list of users.
-  class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+  class ARMConstantIslands : public MachineFunctionPass {
     /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
     /// by MBB Number.  The two-byte pads required for Thumb alignment are
     /// counted as part of the following block (i.e., the offset and size for
@@ -1487,24 +1487,65 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
       Bits = 11;
       Scale = 2;
       break;
-    case ARM::t2Bcc:
+    case ARM::t2Bcc: {
       NewOpc = ARM::tBcc;
       Bits = 8;
-      Scale = 2;      
+      Scale = 2;
       break;
     }
-    if (!NewOpc)
+    }
+    if (NewOpc) {
+      unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+      MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+      if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+        Br.MI->setDesc(TII->get(NewOpc));
+        MachineBasicBlock *MBB = Br.MI->getParent();
+        BBSizes[MBB->getNumber()] -= 2;
+        AdjustBBOffsetsAfter(MBB, -2);
+        ++NumT2BrShrunk;
+        MadeChange = true;
+      }
+    }
+
+    Opcode = Br.MI->getOpcode();
+    if (Opcode != ARM::tBcc)
       continue;
 
-    unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+    NewOpc = 0;
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+    if (Pred == ARMCC::EQ)
+      NewOpc = ARM::tCBZ;
+    else if (Pred == ARMCC::NE)
+      NewOpc = ARM::tCBNZ;
+    if (!NewOpc)
+      continue;
     MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
-    if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
-      Br.MI->setDesc(TII->get(NewOpc));
-      MachineBasicBlock *MBB = Br.MI->getParent();
-      BBSizes[MBB->getNumber()] -= 2;
-      AdjustBBOffsetsAfter(MBB, -2);
-      ++NumT2BrShrunk;
-      MadeChange = true;
+    // Check if the distance is within 126. Subtract starting offset by 2
+    // because the cmp will be eliminated.
+    unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+    unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+    if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+      MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
+      if (CmpMI->getOpcode() == ARM::tCMPzi8) {
+        unsigned Reg = CmpMI->getOperand(0).getReg();
+        Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+        if (Pred == ARMCC::AL &&
+            CmpMI->getOperand(1).getImm() == 0 &&
+            isARMLowRegister(Reg)) {
+          MachineBasicBlock *MBB = Br.MI->getParent();
+          MachineInstr *NewBR =
+            BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+            .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
+          CmpMI->eraseFromParent();
+          Br.MI->eraseFromParent();
+          Br.MI = NewBR;
+          BBSizes[MBB->getNumber()] -= 2;
+          AdjustBBOffsetsAfter(MBB, -2);
+          ++NumCBZ;
+          MadeChange = true;
+        }
+      }
     }
   }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 7170089..efa941a 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,19 +13,21 @@
 
 #include "ARMConstantPoolValue.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Type.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
-ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id,
                                            ARMCP::ARMCPKind K,
                                            unsigned char PCAdj,
                                            const char *Modif,
                                            bool AddCA)
-  : MachineConstantPoolValue((const Type*)gv->getType()),
-    GV(gv), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
+  : MachineConstantPoolValue((const Type*)cval->getType()),
+    CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
     Modifier(Modif), AddCurrentAddress(AddCA) {}
 
 ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
@@ -34,14 +36,22 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
                                            const char *Modif,
                                            bool AddCA)
   : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
-    GV(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPValue), PCAdjust(PCAdj),
-    Modifier(Modif), AddCurrentAddress(AddCA) {}
+    CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
+    PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
 
 ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif)
   : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
-    GV(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
+    CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
     Modifier(Modif) {}
 
+GlobalValue *ARMConstantPoolValue::getGV() const {
+  return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+  return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
 int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
                                                     unsigned Alignment) {
   unsigned AlignMask = Alignment - 1;
@@ -51,7 +61,7 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
         (Constants[i].getAlignment() & AlignMask) == 0) {
       ARMConstantPoolValue *CPV =
         (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
-      if (CPV->GV == GV &&
+      if (CPV->CVal == CVal &&
           CPV->S == S &&
           CPV->LabelId == LabelId &&
           CPV->PCAdjust == PCAdjust)
@@ -68,7 +78,7 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {
 
 void
 ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
-  ID.AddPointer(GV);
+  ID.AddPointer(CVal);
   ID.AddPointer(S);
   ID.AddInteger(LabelId);
   ID.AddInteger(PCAdjust);
@@ -80,8 +90,8 @@ void ARMConstantPoolValue::dump() const {
 
 
 void ARMConstantPoolValue::print(raw_ostream &O) const {
-  if (GV)
-    O << GV->getName();
+  if (CVal)
+    O << CVal->getName();
   else
     O << S;
   if (Modifier) O << "(" << Modifier << ")";
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 00c4808..8fb3f92 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -18,31 +18,35 @@
 
 namespace llvm {
 
+class Constant;
+class BlockAddress;
 class GlobalValue;
 class LLVMContext;
 
 namespace ARMCP {
   enum ARMCPKind {
     CPValue,
+    CPExtSymbol,
+    CPBlockAddress,
     CPLSDA
   };
 }
 
 /// ARMConstantPoolValue - ARM specific constantpool value. This is used to
-/// represent PC relative displacement between the address of the load
-/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
 class ARMConstantPoolValue : public MachineConstantPoolValue {
-  GlobalValue *GV;         // GlobalValue being loaded.
+  Constant *CVal;          // Constant being loaded.
   const char *S;           // ExtSymbol being loaded.
   unsigned LabelId;        // Label id of the load.
-  ARMCP::ARMCPKind Kind;   // Value or LSDA?
-  unsigned char PCAdjust;  // Extra adjustment if constantpool is pc relative.
+  ARMCP::ARMCPKind Kind;   // Kind of constant.
+  unsigned char PCAdjust;  // Extra adjustment if constantpool is pc-relative.
                            // 8 for ARM, 4 for Thumb.
   const char *Modifier;    // GV modifier i.e. (&GV(modifier)-(LPIC+8))
   bool AddCurrentAddress;
 
 public:
-  ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+  ARMConstantPoolValue(Constant *cval, unsigned id,
                        ARMCP::ARMCPKind Kind = ARMCP::CPValue,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
@@ -53,14 +57,17 @@ public:
   ARMConstantPoolValue();
   ~ARMConstantPoolValue();
 
-
-  GlobalValue *getGV() const { return GV; }
+  GlobalValue *getGV() const;
   const char *getSymbol() const { return S; }
+  BlockAddress *getBlockAddress() const;
   const char *getModifier() const { return Modifier; }
   bool hasModifier() const { return Modifier != NULL; }
   bool mustAddCurrentAddress() const { return AddCurrentAddress; }
   unsigned getLabelId() const { return LabelId; }
   unsigned char getPCAdjustment() const { return PCAdjust; }
+  bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+  bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+  bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
   bool isLSDA() { return Kind == ARMCP::CPLSDA; }
 
   virtual unsigned getRelocationInfo() const {
@@ -69,7 +76,6 @@ public:
     return 2;
   }
 
-
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
                                         unsigned Alignment);
 
@@ -80,7 +86,6 @@ public:
   void dump() const;
 };
 
-
 inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
   V.print(O);
   return O;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 5c1835b..1489cab 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #include "ARM.h"
 #include "ARMAddressingModes.h"
-#include "ARMConstantPoolValue.h"
 #include "ARMISelLowering.h"
 #include "ARMTargetMachine.h"
 #include "llvm/CallingConv.h"
@@ -284,7 +283,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
       }
     }
 
-  // Otherwise this is R +/- [possibly shifted] R
+  // Otherwise this is R +/- [possibly shifted] R.
   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
   unsigned ShAmt = 0;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 6a264fd..b6ce5dd 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -330,9 +330,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     if (!Subtarget->hasV6Ops())
       setOperationAction(ISD::MULHS, MVT::i32, Expand);
   }
-  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRL,       MVT::i64, Custom);
   setOperationAction(ISD::SRA,       MVT::i64, Custom);
 
@@ -363,6 +363,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 
   // Use the default implementation.
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
@@ -495,6 +496,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FMRRD:         return "ARMISD::FMRRD";
   case ARMISD::FMDRR:         return "ARMISD::FMDRR";
 
+  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+
   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 
   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
@@ -1017,7 +1021,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
-                           DAG.getEntryNode(), CPAddr, NULL, 0);
+                           DAG.getEntryNode(), CPAddr,
+                           PseudoSourceValue::getConstantPool(), 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1036,7 +1041,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
-                           DAG.getEntryNode(), CPAddr, NULL, 0);
+                           DAG.getEntryNode(), CPAddr,
+                           PseudoSourceValue::getConstantPool(), 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1201,6 +1207,30 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
 }
 
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  SDValue CPAddr;
+  if (RelocM == Reloc::Static) {
+    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+  } else {
+    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
+                                                         ARMCP::CPBlockAddress,
+                                                         PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  }
+  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+                               PseudoSourceValue::getConstantPool(), 0);
+  if (RelocM == Reloc::Static)
+    return Result;
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
 SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -1213,7 +1243,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                              ARMCP::CPValue, PCAdj, "tlsgd", true);
   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
-  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
+  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+                         PseudoSourceValue::getConstantPool(), 0);
   SDValue Chain = Argument.getValue(1);
 
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
@@ -1255,19 +1286,22 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                ARMCP::CPValue, PCAdj, "gottpoff", true);
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         PseudoSourceValue::getConstantPool(), 0);
     Chain = Offset.getValue(1);
 
     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         PseudoSourceValue::getConstantPool(), 0);
   } else {
     // local exec model
     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         PseudoSourceValue::getConstantPool(), 0);
   }
 
   // The address of the thread local variable is the add of the thread
@@ -1336,7 +1370,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   }
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 
-  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               PseudoSourceValue::getConstantPool(), 0);
   SDValue Chain = Result.getValue(1);
 
   if (RelocM == Reloc::PIC_) {
@@ -1345,7 +1380,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   }
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
-    Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+    Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+                         PseudoSourceValue::getGOT(), 0);
 
   return Result;
 }
@@ -1392,7 +1428,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result =
-      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                  PseudoSourceValue::getConstantPool(), 0);
     SDValue Chain = Result.getValue(1);
 
     if (RelocM == Reloc::PIC_) {
@@ -1489,7 +1526,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
+    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+                            PseudoSourceValue::getFixedStack(FI), 0);
   } else {
     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -1602,7 +1640,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   PseudoSourceValue::getFixedStack(FI), 0));
     }
   }
 
@@ -1618,13 +1657,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
     unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
     unsigned VARegSize = (4 - NumGPRs) * 4;
     unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
-    unsigned ArgOffset = 0;
+    unsigned ArgOffset = CCInfo.getNextStackOffset();
     if (VARegSaveSize) {
       // If this function is vararg, store any remaining integer argument regs
       // to their spots on the stack so that they may be loaded by deferencing
       // the result of va_next.
       AFI->setVarArgsRegSaveSize(VARegSaveSize);
-      ArgOffset = CCInfo.getNextStackOffset();
       VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
                                                  VARegSaveSize - VARegSize);
       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
@@ -1639,7 +1677,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                        PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
@@ -1839,12 +1878,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
                        Addr, Op.getOperand(2), JTI, UId);
   }
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, NULL, 0);
+    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+                       PseudoSourceValue::getJumpTable(), 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   } else {
-    Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0);
+    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+                       PseudoSourceValue::getJumpTable(), 0);
     Chain = Addr.getValue(1);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   }
@@ -2055,7 +2096,7 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
-  // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their
+  // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
   // dest type. This ensures they get CSE'd.
   SDValue Vec;
   SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
@@ -2072,6 +2113,76 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
 }
 
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+                                   const ARMSubtarget *ST) {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ARMCC;
+  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, MVT::i32));
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+                          ARMCC, DAG, ST->isThumb1Only(), dl);
+  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
+                           CCR, Cmp);
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
+                                   const ARMSubtarget *ST) {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ARMCC;
+
+  assert(Op.getOpcode() == ISD::SHL_PARTS);
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, MVT::i32));
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+                          ARMCC, DAG, ST->isThumb1Only(), dl);
+  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
+                           CCR, Cmp);
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                           const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
@@ -2641,6 +2752,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
   if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
     int Lane = SVN->getSplatIndex();
+    // If this is undef splat, generate it via "just" vdup, if possible.
+    if (Lane == -1) Lane = 0;
+
     if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
       return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
     }
@@ -2741,6 +2855,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
   case ISD::GlobalAddress:
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
@@ -2763,6 +2878,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SHL:
   case ISD::SRL:
   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
+  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG, Subtarget);
+  case ISD::SRL_PARTS:
+  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG, Subtarget);
   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -3990,3 +4108,60 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The ARM target isn't yet aware of offsets.
   return false;
 }
+
+int ARM::getVFPf32Imm(const APFloat &FPImm) {
+  APInt Imm = FPImm.bitcastToAPInt();
+  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
+  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0x7ffff)
+    return -1;
+  Mantissa >>= 19;
+  if ((Mantissa & 0xf) != Mantissa)
+    return -1;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+int ARM::getVFPf64Imm(const APFloat &FPImm) {
+  APInt Imm = FPImm.bitcastToAPInt();
+  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
+  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0xffffffffffffLL)
+    return -1;
+  Mantissa >>= 48;
+  if ((Mantissa & 0xf) != Mantissa)
+    return -1;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (!Subtarget->hasVFP3())
+    return false;
+  if (VT == MVT::f32)
+    return ARM::getVFPf32Imm(Imm) != -1;
+  if (VT == MVT::f64)
+    return ARM::getVFPf64Imm(Imm) != -1;
+  return false;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7d85f45..9c7a91d 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -137,6 +137,13 @@ namespace llvm {
     /// return the constant being splatted.  The ByteSize field indicates the
     /// number of bytes of each element [1248].
     SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+    /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
+    /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
+    /// instruction, returns its 8-bit integer representation. Otherwise,
+    /// returns -1.
+    int getVFPf32Imm(const APFloat &FPImm);
+    int getVFPf64Imm(const APFloat &FPImm);
   }
 
   //===--------------------------------------------------------------------===//
@@ -224,6 +231,12 @@ namespace llvm {
 
     bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
   private:
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when generating code for different targets.
@@ -255,6 +268,7 @@ namespace llvm {
                              ISD::ArgFlagsTy Flags);
     SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG);
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 8225fd7..83b5cb4 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -108,6 +108,15 @@ def IndexModeNone : IndexMode<0>;
 def IndexModePre  : IndexMode<1>;
 def IndexModePost : IndexMode<2>;
 
+// Instruction execution domain.
+class Domain<bits<2> val> {
+  bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain     : Domain<1>; // Instructions in VFP domain only
+def NeonDomain    : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+
 //===----------------------------------------------------------------------===//
 
 // ARM special operands.
@@ -136,7 +145,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
 //
 
 class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
-              Format f, string cstr, InstrItinClass itin>
+              Format f, Domain d, string cstr, InstrItinClass itin>
   : Instruction {
   field bits<32> Inst;
 
@@ -155,6 +164,9 @@ class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
   Format F = f;
   bits<5> Form = F.Value;
 
+  Domain D = d;
+  bits<2> Dom = D.Value;
+
   //
   // Attributes specific to ARM instructions...
   //
@@ -167,7 +179,8 @@ class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
 
 class PseudoInst<dag oops, dag iops, InstrItinClass itin, 
                  string asm, list<dag> pattern>
-  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, "", itin> {
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain, 
+            "", itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -179,7 +192,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
         IndexMode im, Format f, InstrItinClass itin, 
         string opc, string asm, string cstr,
         list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr, itin> {
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
@@ -194,7 +207,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
          IndexMode im, Format f, InstrItinClass itin,
          string opc, string asm, string cstr,
          list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr, itin> {
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${p}${s}", asm));
@@ -206,7 +219,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
          IndexMode im, Format f, InstrItinClass itin,
          string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr, itin> {
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -807,7 +820,7 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 
 class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -833,7 +846,7 @@ class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> patter
 // Thumb1 only
 class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
               InstrItinClass itin, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -861,7 +874,7 @@ class T1It<dag oops, dag iops, InstrItinClass itin,
 class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = !con(oops, (ops s_cc_out:$s));
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
@@ -883,7 +896,7 @@ class T1sIt<dag oops, dag iops, InstrItinClass itin,
 class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
@@ -918,7 +931,7 @@ class T1pIs<dag oops, dag iops,
 class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
               InstrItinClass itin,
               string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
@@ -934,7 +947,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
@@ -946,7 +959,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> {
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -993,7 +1006,7 @@ class T2Ix2<dag oops, dag iops, InstrItinClass itin,
 class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
                  InstrItinClass itin,
                  string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, ThumbFrm, cstr, itin> {
+  : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
@@ -1026,7 +1039,7 @@ class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
 class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
            IndexMode im, Format f, InstrItinClass itin,
            string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr, itin> {
+  : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
@@ -1038,7 +1051,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
             IndexMode im, Format f, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : InstARM<am, sz, im, f, cstr, itin> {
+  : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString   = asm;
@@ -1061,6 +1074,9 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
   let Inst{27-24} = opcod1;
   let Inst{21-20} = opcod2;
   let Inst{11-8}  = 0b1011;
+
+  // 64-bit loads & stores operate on both NEON and VFP pipelines.
+  let Dom = VFPNeonDomain.Value;
 }
 
 class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
@@ -1082,6 +1098,9 @@ class AXDI5<dag oops, dag iops, InstrItinClass itin,
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
   let Inst{11-8}  = 0b1011;
+
+  // 64-bit loads & stores operate on both NEON and VFP pipelines.
+  let Dom = VFPNeonDomain.Value;
 }
 
 class AXSI5<dag oops, dag iops, InstrItinClass itin,
@@ -1125,8 +1144,8 @@ class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
 // Single precision unary, if no NEON
 // Same as ASuI except not available if NEON is enabled
 class ASuIn<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
-            InstrItinClass itin,    string opc, string asm, list<dag> pattern>
-  : ASuI<opcod1, opcod2, opcod2, oops, iops, itin, opc, asm, pattern> {
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : ASuI<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> {
   list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
 }
 
@@ -1199,7 +1218,7 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
 
 class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, NEONFrm, cstr, itin> {
+  : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString = asm;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index dd4123b..86bbe2a 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -25,7 +25,7 @@
 using namespace llvm;
 
 ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
-  : RI(*this, STI), Subtarget(STI) {
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
 unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
@@ -67,6 +67,7 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   case ARM::BX_RET:   // Return.
   case ARM::LDM_RET:
   case ARM::B:
+  case ARM::BRIND:
   case ARM::BR_JTr:   // Jumptable branch.
   case ARM::BR_JTm:   // Jumptable branch through mem.
   case ARM::BR_JTadd: // Jumptable branch add to pc.
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index c616949..5d1678d 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -25,7 +25,6 @@ namespace llvm {
 
 class ARMInstrInfo : public ARMBaseInstrInfo {
   ARMRegisterInfo RI;
-  const ARMSubtarget &Subtarget;
 public:
   explicit ARMInstrInfo(const ARMSubtarget &STI);
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 384b98c..cbe80b4 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -370,19 +370,19 @@ include "ARMInstrFormats.td"
 multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
                         bit Commutable = 0> {
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               IIC_iALUi, opc, " $dst, $a, $b",
+               IIC_iALUi, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
     let Inst{25} = 1;
   }
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               IIC_iALUr, opc, " $dst, $a, $b",
+               IIC_iALUr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
     let Inst{4} = 0;
     let Inst{25} = 0;
     let isCommutable = Commutable;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               IIC_iALUsr, opc, " $dst, $a, $b",
+               IIC_iALUsr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
     let Inst{4} = 1;
     let Inst{7} = 0;
@@ -396,22 +396,25 @@ let Defs = [CPSR] in {
 multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
                          bit Commutable = 0> {
   def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               IIC_iALUi, opc, "s $dst, $a, $b",
+               IIC_iALUi, opc, "s\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
+    let Inst{20} = 1;
     let Inst{25} = 1;
   }
   def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               IIC_iALUr, opc, "s $dst, $a, $b",
+               IIC_iALUr, opc, "s\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
     let isCommutable = Commutable;
     let Inst{4} = 0;
+    let Inst{20} = 1;
     let Inst{25} = 0;
   }
   def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               IIC_iALUsr, opc, "s $dst, $a, $b",
+               IIC_iALUsr, opc, "s\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
     let Inst{4} = 1;
     let Inst{7} = 0;
+    let Inst{20} = 1;
     let Inst{25} = 0;
   }
 }
@@ -424,13 +427,13 @@ let Defs = [CPSR] in {
 multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
                        bit Commutable = 0> {
   def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
-               opc, " $a, $b",
+               opc, "\t$a, $b",
                [(opnode GPR:$a, so_imm:$b)]> {
     let Inst{20} = 1;
     let Inst{25} = 1;
   }
   def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
-               opc, " $a, $b",
+               opc, "\t$a, $b",
                [(opnode GPR:$a, GPR:$b)]> {
     let Inst{4} = 0;
     let Inst{20} = 1;
@@ -438,7 +441,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
     let isCommutable = Commutable;
   }
   def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
-               opc, " $a, $b",
+               opc, "\t$a, $b",
                [(opnode GPR:$a, so_reg:$b)]> {
     let Inst{4} = 1;
     let Inst{7} = 0;
@@ -453,28 +456,31 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
 /// FIXME: Remove the 'r' variant. Its rot_imm is zero.
 multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
   def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
-                 IIC_iUNAr, opc, " $dst, $src",
+                 IIC_iUNAr, opc, "\t$dst, $src",
                  [(set GPR:$dst, (opnode GPR:$src))]>,
               Requires<[IsARM, HasV6]> {
-                let Inst{19-16} = 0b1111;
-              }
+    let Inst{11-10} = 0b00;
+    let Inst{19-16} = 0b1111;
+  }
   def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
-                 IIC_iUNAsi, opc, " $dst, $src, ror $rot",
+                 IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
                  [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
               Requires<[IsARM, HasV6]> {
-                let Inst{19-16} = 0b1111;
-              }
+    let Inst{19-16} = 0b1111;
+  }
 }
 
 /// AI_bin_rrot - A binary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
   def rr     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
-                  IIC_iALUr, opc, " $dst, $LHS, $RHS",
+                  IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
                   [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
-                  Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]> {
+    let Inst{11-10} = 0b00;
+  }
   def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
-                  IIC_iALUsi, opc, " $dst, $LHS, $RHS, ror $rot",
+                  IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
                   [(set GPR:$dst, (opnode GPR:$LHS,
                                           (rotr GPR:$RHS, rot_imm:$rot)))]>,
                   Requires<[IsARM, HasV6]>;
@@ -485,13 +491,13 @@ let Uses = [CPSR] in {
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                              bit Commutable = 0> {
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, IIC_iALUi, opc, " $dst, $a, $b",
+                DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
     let Inst{25} = 1;
   }
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, IIC_iALUr, opc, " $dst, $a, $b",
+                DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
     let isCommutable = Commutable;
@@ -499,7 +505,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
     let Inst{25} = 0;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b",
+                DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
     let Inst{4} = 1;
@@ -508,27 +514,30 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   }
   // Carry setting variants
   def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, IIC_iALUi, !strconcat(opc, "s $dst, $a, $b"),
+                DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
+    let Inst{20} = 1;
     let Inst{25} = 1;
   }
   def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, IIC_iALUr, !strconcat(opc, "s $dst, $a, $b"),
+                DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
     let Inst{4} = 0;
+    let Inst{20} = 1;
     let Inst{25} = 0;
   }
   def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s $dst, $a, $b"),
+                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
     let Inst{4} = 1;
     let Inst{7} = 0;
+    let Inst{20} = 1;
     let Inst{25} = 0;
   }
 }
@@ -573,42 +582,42 @@ PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary,
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
-                  Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p $dst, pc, $a",
+                  Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p\t$dst, pc, $a",
                    [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
 
 let AddedComplexity = 10 in {
 let canFoldAsLoad = 1 in
 def PICLDR  : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p $dst, $addr",
+                  Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr",
                   [(set GPR:$dst, (load addrmodepc:$addr))]>;
 
 def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                 Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h $dst, $addr",
+                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                 Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b $dst, $addr",
+                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
 
 def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh $dst, $addr",
+               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh\t$dst, $addr",
                   [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb $dst, $addr",
+               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb\t$dst, $addr",
                   [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
 }
 let AddedComplexity = 10 in {
 def PICSTR  : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p $src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr",
                [(store GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h $src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h\t$src, $addr",
                [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b $src, $addr",
+               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b\t$src, $addr",
                [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
 }
 } // isNotDuplicable = 1
@@ -618,10 +627,10 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
 // assembler.
 def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
                     Pseudo, IIC_iALUi,
-            !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(",
-                                  "${:private}PCRELL${:uid}+8))\n"),
-                       !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                  "add$p $dst, pc, #${:private}PCRELV${:uid}")),
+           !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(",
+                                 "${:private}PCRELL${:uid}+8))\n"),
+                      !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                 "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
                    []>;
 
 def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
@@ -631,7 +640,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
                          "(${label}_${id}-(",
                                   "${:private}PCRELL${:uid}+8))\n"),
                        !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                  "add$p $dst, pc, #${:private}PCRELV${:uid}")),
+                                  "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
                    []> {
     let Inst{25} = 1;
 }
@@ -642,19 +651,29 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
 
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in
   def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, 
-                  "bx", " lr", [(ARMretflag)]> {
+                  "bx", "\tlr", [(ARMretflag)]> {
   let Inst{7-4}   = 0b0001;
   let Inst{19-8}  = 0b111111111111;
   let Inst{27-20} = 0b00010010;
 }
 
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+                  [(brind GPR:$dst)]> {
+    let Inst{7-4}   = 0b0001;
+    let Inst{19-8}  = 0b111111111111;
+    let Inst{27-20} = 0b00010010;
+  }
+}
+
 // FIXME: remove when we have a way to marking a MI with these properties.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
   def LDM_RET : AXI4ld<(outs),
                     (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-                    LdStMulFrm, IIC_Br, "ldm${p}${addr:submode} $addr, $wb",
+                    LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb",
                     []>;
 
 // On non-Darwin platforms R9 is callee-saved.
@@ -664,18 +683,20 @@ let isCall = 1,
           D16, D17, D18, D19, D20, D21, D22, D23,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                IIC_Br, "bl ${func:call}",
+                IIC_Br, "bl\t${func:call}",
                 [(ARMcall tglobaladdr:$func)]>,
-            Requires<[IsARM, IsNotDarwin]>;
+            Requires<[IsARM, IsNotDarwin]> {
+    let Inst{31-28} = 0b1110;
+  }
 
   def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                   IIC_Br, "bl", " ${func:call}",
+                   IIC_Br, "bl", "\t${func:call}",
                    [(ARMcall_pred tglobaladdr:$func)]>,
                 Requires<[IsARM, IsNotDarwin]>;
 
   // ARMv5T and above
   def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
-                IIC_Br, "blx $func",
+                IIC_Br, "blx\t$func",
                 [(ARMcall GPR:$func)]>,
             Requires<[IsARM, HasV5T, IsNotDarwin]> {
     let Inst{7-4}   = 0b0011;
@@ -685,7 +706,7 @@ let isCall = 1,
 
   // ARMv4T
   def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
-                  IIC_Br, "mov lr, pc\n\tbx $func",
+                  IIC_Br, "mov\tlr, pc\n\tbx\t$func",
                   [(ARMcall_nolink GPR:$func)]>,
            Requires<[IsARM, IsNotDarwin]> {
     let Inst{7-4}   = 0b0001;
@@ -701,17 +722,19 @@ let isCall = 1,
           D16, D17, D18, D19, D20, D21, D22, D23,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   def BLr9  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                IIC_Br, "bl ${func:call}",
-                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>;
+                IIC_Br, "bl\t${func:call}",
+                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
+    let Inst{31-28} = 0b1110;
+  }
 
   def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                   IIC_Br, "bl", " ${func:call}",
+                   IIC_Br, "bl", "\t${func:call}",
                    [(ARMcall_pred tglobaladdr:$func)]>,
                   Requires<[IsARM, IsDarwin]>;
 
   // ARMv5T and above
   def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
-                IIC_Br, "blx $func",
+                IIC_Br, "blx\t$func",
                 [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
     let Inst{7-4}   = 0b0011;
     let Inst{19-8}  = 0b111111111111;
@@ -720,7 +743,7 @@ let isCall = 1,
 
   // ARMv4T
   def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops),
-                  IIC_Br, "mov lr, pc\n\tbx $func",
+                  IIC_Br, "mov\tlr, pc\n\tbx\t$func",
                   [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> {
     let Inst{7-4}   = 0b0001;
     let Inst{19-8}  = 0b111111111111;
@@ -733,11 +756,11 @@ let isBranch = 1, isTerminator = 1 in {
   let isBarrier = 1 in {
     let isPredicable = 1 in
     def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
-                "b $target", [(br bb:$target)]>;
+                "b\t$target", [(br bb:$target)]>;
 
   let isNotDuplicable = 1, isIndirectBranch = 1 in {
   def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
-                    IIC_Br, "mov pc, $target \n$jt",
+                    IIC_Br, "mov\tpc, $target \n$jt",
                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
     let Inst{20}    = 0; // S Bit
     let Inst{24-21} = 0b1101;
@@ -745,7 +768,7 @@ let isBranch = 1, isTerminator = 1 in {
   }
   def BR_JTm : JTI<(outs),
                    (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
-                   IIC_Br, "ldr pc, $target \n$jt",
+                   IIC_Br, "ldr\tpc, $target \n$jt",
                    [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
                      imm:$id)]> {
     let Inst{20}    = 1; // L bit
@@ -756,7 +779,7 @@ let isBranch = 1, isTerminator = 1 in {
   }
   def BR_JTadd : JTI<(outs),
                    (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
-                    IIC_Br, "add pc, $target, $idx \n$jt",
+                    IIC_Br, "add\tpc, $target, $idx \n$jt",
                     [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
                       imm:$id)]> {
     let Inst{20}    = 0; // S bit
@@ -769,7 +792,7 @@ let isBranch = 1, isTerminator = 1 in {
   // FIXME: should be able to write a pattern for ARMBrcond, but can't use
   // a two-value operand where a dag node expects two operands. :( 
   def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
-               IIC_Br, "b", " $target",
+               IIC_Br, "b", "\t$target",
                [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
 }
 
@@ -780,140 +803,140 @@ let isBranch = 1, isTerminator = 1 in {
 // Load
 let canFoldAsLoad = 1, isReMaterializable = 1 in 
 def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
-               "ldr", " $dst, $addr",
+               "ldr", "\t$dst, $addr",
                [(set GPR:$dst, (load addrmode2:$addr))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
 def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
-                 "ldr", " $dst, $addr", []>;
+                 "ldr", "\t$dst, $addr", []>;
 
 // Loads with zero extension
 def LDRH  : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                  IIC_iLoadr, "ldr", "h $dst, $addr",
+                  IIC_iLoadr, "ldr", "h\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
 
 def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, 
-                  IIC_iLoadr, "ldr", "b $dst, $addr",
+                  IIC_iLoadr, "ldr", "b\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
 
 // Loads with sign extension
 def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                   IIC_iLoadr, "ldr", "sh $dst, $addr",
+                   IIC_iLoadr, "ldr", "sh\t$dst, $addr",
                    [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
 
 def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                   IIC_iLoadr, "ldr", "sb $dst, $addr",
+                   IIC_iLoadr, "ldr", "sb\t$dst, $addr",
                    [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
-                 IIC_iLoadr, "ldr", "d $dst1, $addr",
+                 IIC_iLoadr, "ldr", "d\t$dst1, $addr",
                  []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed loads
 def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
                      (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
-                     "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>;
+                     "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
                      (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
-                     "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>;
+                     "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRH_PRE  : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
                      (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                     "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>;
+                     "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
                      (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                     "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>;
+                     "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRB_PRE  : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
                      (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
-                     "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>;
+                     "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
                      (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
-                     "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>;
+                     "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
                       (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                      "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>;
+                      "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                    "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>;
+                    "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>;
 
 def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
                       (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                      "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>;
+                      "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>;
 
 def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                    "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+                    "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>;
 }
 
 // Store
 def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
-               "str", " $src, $addr",
+               "str", "\t$src, $addr",
                [(store GPR:$src, addrmode2:$addr)]>;
 
 // Stores with truncate
 def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
-               "str", "h $src, $addr",
+               "str", "h\t$src, $addr",
                [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
 
 def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
-               "str", "b $src, $addr",
+               "str", "b\t$src, $addr",
                [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
 
 // Store doubleword
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
                StMiscFrm, IIC_iStorer,
-               "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+               "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base, am2offset:$offset), 
                      StFrm, IIC_iStoreru,
-                    "str", " $src, [$base, $offset]!", "$base = $base_wb",
+                    "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
 
 def STR_POST : AI2stwpo<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am2offset:$offset), 
                      StFrm, IIC_iStoreru,
-                    "str", " $src, [$base], $offset", "$base = $base_wb",
+                    "str", "\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
 
 def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am3offset:$offset), 
                      StMiscFrm, IIC_iStoreru,
-                     "str", "h $src, [$base, $offset]!", "$base = $base_wb",
+                     "str", "h\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
 
 def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am3offset:$offset), 
                      StMiscFrm, IIC_iStoreru,
-                     "str", "h $src, [$base], $offset", "$base = $base_wb",
+                     "str", "h\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
                                          GPR:$base, am3offset:$offset))]>;
 
 def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am2offset:$offset), 
                      StFrm, IIC_iStoreru,
-                     "str", "b $src, [$base, $offset]!", "$base = $base_wb",
+                     "str", "b\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
 def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
                      (ins GPR:$src, GPR:$base,am2offset:$offset), 
                      StFrm, IIC_iStoreru,
-                     "str", "b $src, [$base], $offset", "$base = $base_wb",
+                     "str", "b\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
@@ -924,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def LDM : AXI4ld<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode} $addr, $wb",
+               LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb",
                []>;
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def STM : AXI4st<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode} $addr, $wb",
+               LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb",
                []>;
 
 //===----------------------------------------------------------------------===//
@@ -939,14 +962,14 @@ def STM : AXI4st<(outs),
 
 let neverHasSideEffects = 1 in
 def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
-                "mov", " $dst, $src", []>, UnaryDP {
+                "mov", "\t$dst, $src", []>, UnaryDP {
   let Inst{4} = 0;
   let Inst{25} = 0;
 }
 
 def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), 
                 DPSoRegFrm, IIC_iMOVsr,
-                "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
+                "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
   let Inst{4} = 1;
   let Inst{7} = 0;
   let Inst{25} = 0;
@@ -954,14 +977,14 @@ def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
-                "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP {
+                "mov", "\t$dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP {
   let Inst{25} = 1;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), 
                  DPFrm, IIC_iMOVi,
-                 "movw", " $dst, $src",
+                 "movw", "\t$dst, $src",
                  [(set GPR:$dst, imm0_65535:$src)]>,
                  Requires<[IsARM, HasV6T2]> {
   let Inst{20} = 0;
@@ -971,7 +994,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
 let Constraints = "$src = $dst" in
 def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                   DPFrm, IIC_iMOVi,
-                  "movt", " $dst, $imm", 
+                  "movt", "\t$dst, $imm", 
                   [(set GPR:$dst,
                         (or (and GPR:$src, 0xffff), 
                             lo16AllZero:$imm))]>, UnaryDP,
@@ -985,7 +1008,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
 
 let Uses = [CPSR] in
 def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
-                 "mov", " $dst, $src, rrx",
+                 "mov", "\t$dst, $src, rrx",
                  [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
 
 // These aren't really mov instructions, but we have to define them this way
@@ -993,10 +1016,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
 
 let Defs = [CPSR] in {
 def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, 
-                      IIC_iMOVsi, "mov", "s $dst, $src, lsr #1",
+                      IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1",
                       [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
 def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
-                      IIC_iMOVsi, "mov", "s $dst, $src, asr #1",
+                      IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1",
                       [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
 }
 
@@ -1047,7 +1070,7 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
 def SBFX  : I<(outs GPR:$dst),
               (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
                AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
-               "sbfx", " $dst, $src, $lsb, $width", "", []>,
+               "sbfx", "\t$dst, $src, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
   let Inst{27-21} = 0b0111101;
   let Inst{6-4}   = 0b101;
@@ -1056,7 +1079,7 @@ def SBFX  : I<(outs GPR:$dst),
 def UBFX  : I<(outs GPR:$dst),
               (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
                AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
-               "ubfx", " $dst, $src, $lsb, $width", "", []>,
+               "ubfx", "\t$dst, $src, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
   let Inst{27-21} = 0b0111111;
   let Inst{6-4}   = 0b101;
@@ -1084,52 +1107,72 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
 
 // These don't define reg/reg forms, because they are handled above.
 def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                  IIC_iALUi, "rsb", " $dst, $a, $b",
+                  IIC_iALUi, "rsb", "\t$dst, $a, $b",
                   [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
     let Inst{25} = 1;
 }
 
 def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                  IIC_iALUsr, "rsb", " $dst, $a, $b",
-                  [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>;
+                  IIC_iALUsr, "rsb", "\t$dst, $a, $b",
+                  [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
+    let Inst{25} = 0;
+}
 
 // RSB with 's' bit set.
 let Defs = [CPSR] in {
 def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                 IIC_iALUi, "rsb", "s $dst, $a, $b",
+                 IIC_iALUi, "rsb", "s\t$dst, $a, $b",
                  [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
+    let Inst{20} = 1;
     let Inst{25} = 1;
 }
 def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                 IIC_iALUsr, "rsb", "s $dst, $a, $b",
-                 [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
+                 IIC_iALUsr, "rsb", "s\t$dst, $a, $b",
+                 [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
+    let Inst{20} = 1;
+    let Inst{25} = 0;
+}
 }
 
 let Uses = [CPSR] in {
 def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                 DPFrm, IIC_iALUi, "rsc", " $dst, $a, $b",
+                 DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
                  [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
                  Requires<[IsARM, CarryDefIsUnused]> {
     let Inst{25} = 1;
 }
 def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                 DPSoRegFrm, IIC_iALUsr, "rsc", " $dst, $a, $b",
+                 DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
                  [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
-                 Requires<[IsARM, CarryDefIsUnused]>;
+                 Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
+    let Inst{25} = 0;
+}
 }
 
 // FIXME: Allow these to be predicated.
 let Defs = [CPSR], Uses = [CPSR] in {
 def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                  DPFrm, IIC_iALUi, "rscs $dst, $a, $b",
+                  DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
                   [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
                   Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{20} = 1;
     let Inst{25} = 1;
 }
 def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                  DPSoRegFrm, IIC_iALUsr, "rscs $dst, $a, $b",
+                  DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
                   [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
-                  Requires<[IsARM, CarryDefIsUnused]>;
+                  Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
+    let Inst{20} = 1;
+    let Inst{25} = 0;
+}
 }
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
@@ -1162,8 +1205,8 @@ defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
 def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
-               "bfc", " $dst, $imm", "$src = $dst",
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfc", "\t$dst, $imm", "$src = $dst",
                [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
                Requires<[IsARM, HasV6T2]> {
   let Inst{27-21} = 0b0111110;
@@ -1171,19 +1214,19 @@ def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
 }
 
 def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
-                  "mvn", " $dst, $src",
+                  "mvn", "\t$dst, $src",
                   [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
   let Inst{4} = 0;
 }
 def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
-                  IIC_iMOVsr, "mvn", " $dst, $src",
+                  IIC_iMOVsr, "mvn", "\t$dst, $src",
                   [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
   let Inst{4} = 1;
   let Inst{7} = 0;
 }
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, 
-                  IIC_iMOVi, "mvn", " $dst, $imm",
+                  IIC_iMOVi, "mvn", "\t$dst, $imm",
                   [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
     let Inst{25} = 1;
 }
@@ -1197,15 +1240,15 @@ def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
 
 let isCommutable = 1 in
 def MUL   : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                   IIC_iMUL32, "mul", " $dst, $a, $b",
+                   IIC_iMUL32, "mul", "\t$dst, $a, $b",
                    [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
 
 def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-                    IIC_iMAC32, "mla", " $dst, $a, $b, $c",
+                    IIC_iMAC32, "mla", "\t$dst, $a, $b, $c",
                    [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
 def MLS   : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-                   IIC_iMAC32, "mls", " $dst, $a, $b, $c",
+                   IIC_iMAC32, "mls", "\t$dst, $a, $b, $c",
                    [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
                    Requires<[IsARM, HasV6T2]>;
 
@@ -1214,31 +1257,31 @@ let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b), IIC_iMUL64,
-                    "smull", " $ldst, $hdst, $a, $b", []>;
+                    "smull", "\t$ldst, $hdst, $a, $b", []>;
 
 def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b), IIC_iMUL64,
-                    "umull", " $ldst, $hdst, $a, $b", []>;
+                    "umull", "\t$ldst, $hdst, $a, $b", []>;
 }
 
 // Multiply + accumulate
 def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                    "smlal", " $ldst, $hdst, $a, $b", []>;
+                    "smlal", "\t$ldst, $hdst, $a, $b", []>;
 
 def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                    "umlal", " $ldst, $hdst, $a, $b", []>;
+                    "umlal", "\t$ldst, $hdst, $a, $b", []>;
 
 def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                    "umaal", " $ldst, $hdst, $a, $b", []>,
+                    "umaal", "\t$ldst, $hdst, $a, $b", []>,
                     Requires<[IsARM, HasV6]>;
 } // neverHasSideEffects
 
 // Most significant word multiply
 def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-               IIC_iMUL32, "smmul", " $dst, $a, $b",
+               IIC_iMUL32, "smmul", "\t$dst, $a, $b",
                [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
             Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b0001;
@@ -1246,7 +1289,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
 }
 
 def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               IIC_iMAC32, "smmla", " $dst, $a, $b, $c",
+               IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
                [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
             Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b0001;
@@ -1254,7 +1297,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 
 
 def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               IIC_iMAC32, "smmls", " $dst, $a, $b, $c",
+               IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
                [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
             Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b1101;
@@ -1262,7 +1305,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 
 multiclass AI_smul<string opc, PatFrag opnode> {
   def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "bb"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
                                       (sext_inreg GPR:$b, i16)))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1271,7 +1314,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "bt"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
                                       (sra GPR:$b, (i32 16))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1280,7 +1323,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "tb"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
                                       (sext_inreg GPR:$b, i16)))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1289,7 +1332,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "tt"), " $dst, $a, $b",
+              IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
                                       (sra GPR:$b, (i32 16))))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1298,7 +1341,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL16, !strconcat(opc, "wb"), " $dst, $a, $b",
+              IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b",
               [(set GPR:$dst, (sra (opnode GPR:$a,
                                     (sext_inreg GPR:$b, i16)), (i32 16)))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1307,7 +1350,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
            }
 
   def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL16, !strconcat(opc, "wt"), " $dst, $a, $b",
+              IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b",
               [(set GPR:$dst, (sra (opnode GPR:$a,
                                     (sra GPR:$b, (i32 16))), (i32 16)))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1319,7 +1362,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
 
 multiclass AI_smla<string opc, PatFrag opnode> {
   def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc,
                                (opnode (sext_inreg GPR:$a, i16),
                                        (sext_inreg GPR:$b, i16))))]>,
@@ -1329,7 +1372,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
                                                      (sra GPR:$b, (i32 16)))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1338,7 +1381,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
                                                  (sext_inreg GPR:$b, i16))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1347,16 +1390,16 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
-              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
-                                                     (sra GPR:$b, (i32 16)))))]>,
+              IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
+             [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
+                                                    (sra GPR:$b, (i32 16)))))]>,
             Requires<[IsARM, HasV5TE]> {
              let Inst{5} = 1;
              let Inst{6} = 1;
            }
 
   def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
                                        (sext_inreg GPR:$b, i16)), (i32 16))))]>,
            Requires<[IsARM, HasV5TE]> {
@@ -1365,7 +1408,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
            }
 
   def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+              IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
                                          (sra GPR:$b, (i32 16))), (i32 16))))]>,
             Requires<[IsARM, HasV5TE]> {
@@ -1385,7 +1428,7 @@ defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 //
 
 def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-              "clz", " $dst, $src",
+              "clz", "\t$dst, $src",
               [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
   let Inst{7-4}   = 0b0001;
   let Inst{11-8}  = 0b1111;
@@ -1393,7 +1436,7 @@ def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
 }
 
 def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-              "rev", " $dst, $src",
+              "rev", "\t$dst, $src",
               [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
   let Inst{7-4}   = 0b0011;
   let Inst{11-8}  = 0b1111;
@@ -1401,7 +1444,7 @@ def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
 }
 
 def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-               "rev16", " $dst, $src",
+               "rev16", "\t$dst, $src",
                [(set GPR:$dst,
                    (or (and (srl GPR:$src, (i32 8)), 0xFF),
                        (or (and (shl GPR:$src, (i32 8)), 0xFF00),
@@ -1414,7 +1457,7 @@ def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
 }
 
 def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-               "revsh", " $dst, $src",
+               "revsh", "\t$dst, $src",
                [(set GPR:$dst,
                   (sext_inreg
                     (or (srl (and GPR:$src, 0xFF00), (i32 8)),
@@ -1427,7 +1470,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
 
 def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
                                  (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-               IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+               IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
                [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
                                    (and (shl GPR:$src2, (i32 imm:$shamt)),
                                         0xFFFF0000)))]>,
@@ -1444,7 +1487,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
 
 def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
                                  (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-               IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+               IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
                [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
                                    (and (sra GPR:$src2, imm16_31:$shamt),
                                         0xFFFF)))]>, Requires<[IsARM, HasV6]> {
@@ -1490,7 +1533,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :( 
 def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
-                IIC_iCMOVr, "mov", " $dst, $true",
+                IIC_iCMOVr, "mov", "\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP {
   let Inst{4} = 0;
@@ -1499,7 +1542,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
 
 def MOVCCs : AI1<0b1101, (outs GPR:$dst),
                         (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr,
-                "mov", " $dst, $true",
+                "mov", "\t$dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP {
   let Inst{4} = 1;
@@ -1509,7 +1552,7 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst),
 
 def MOVCCi : AI1<0b1101, (outs GPR:$dst),
                         (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
-                "mov", " $dst, $true",
+                "mov", "\t$dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP {
   let Inst{25} = 1;
@@ -1524,7 +1567,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
 let isCall = 1,
   Defs = [R0, R12, LR, CPSR] in {
   def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br,
-               "bl __aeabi_read_tp",
+               "bl\t__aeabi_read_tp",
                [(set R0, ARMthread_pointer)]>;
 }
 
@@ -1548,12 +1591,12 @@ let Defs =
   def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src),
                                AddrModeNone, SizeSpecial, IndexModeNone,
                                Pseudo, NoItinerary,
-                               "str sp, [$src, #+8] @ eh_setjmp begin\n\t"
-                               "add r12, pc, #8\n\t"
-                               "str r12, [$src, #+4]\n\t"
-                               "mov r0, #0\n\t"
-                               "add pc, pc, #0\n\t"
-                               "mov r0, #1 @ eh_setjmp end", "",
+                               "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+                               "add\tr12, pc, #8\n\t"
+                               "str\tr12, [$src, #+4]\n\t"
+                               "mov\tr0, #0\n\t"
+                               "add\tpc, pc, #0\n\t"
+                               "mov\tr0, #1 @ eh_setjmp end", "",
                                [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
 }
 
@@ -1573,7 +1616,7 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 let isReMaterializable = 1 in
 def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), 
                          Pseudo, IIC_iMOVi,
-                         "mov", " $dst, $src",
+                         "mov", "\t$dst, $src",
                          [(set GPR:$dst, so_imm2part:$src)]>,
                   Requires<[IsARM, NoV6T2]>;
 
@@ -1596,7 +1639,7 @@ def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS),
 // FIXME: Remove this when we can do generalized remat.
 let isReMaterializable = 1 in
 def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
-                     "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>,
                Requires<[IsARM, HasV6T2]>;
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 822950c..25c4acd 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2409,10 +2409,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
                              (DSubReg_i32_reg imm:$lane))),
                      (SubReg_i32_lane imm:$lane))>;
 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2),
+          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)),
                           (SSubReg_f32_reg imm:$src2))>;
 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2),
+          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)),
                           (SSubReg_f32_reg imm:$src2))>;
 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
 //          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
@@ -2459,11 +2459,11 @@ def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
                   (DSubReg_i32_reg imm:$lane)))>;
 
 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
-          (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2),
-                         SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
-          (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2),
-                         SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
 
 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
 //          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
@@ -2841,13 +2841,16 @@ def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
 def : N3VDsPat<fmul, VMULfd_sfp>;
 
 // Vector Multiply-Accumulate/Subtract used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
-def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
+// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
 
-let neverHasSideEffects = 1 in
-def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
-def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//let neverHasSideEffects = 1 in
+//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
+//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+
+//let neverHasSideEffects = 1 in
+//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
+//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
 
 // Vector Absolute used for single-precision FP
 let neverHasSideEffects = 1 in
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 9816add..5d02925 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -130,61 +130,67 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
 // For both thumb1 and thumb2.
 let isNotDuplicable = 1 in
 def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
-                 "\n$cp:\n\tadd $dst, pc",
+                 "\n$cp:\n\tadd\t$dst, pc",
                  [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
 
 // PC relative add.
 def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi,
-                  "add $dst, pc, $rhs * 4", []>;
+                  "add\t$dst, pc, $rhs * 4", []>;
 
 // ADD rd, sp, #imm8
 def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi,
-                  "add $dst, $sp, $rhs * 4", []>;
+                  "add\t$dst, $sp, $rhs * 4", []>;
 
 // ADD sp, sp, #imm7
 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                  "add $dst, $rhs * 4", []>;
+                  "add\t$dst, $rhs * 4", []>;
 
 // SUB sp, sp, #imm7
 def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                  "sub $dst, $rhs * 4", []>;
+                  "sub\t$dst, $rhs * 4", []>;
 
 // ADD rm, sp
 def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                  "add $dst, $rhs", []>;
+                  "add\t$dst, $rhs", []>;
 
 // ADD sp, rm
 def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                  "add $dst, $rhs", []>;
+                  "add\t$dst, $rhs", []>;
 
 // Pseudo instruction that will expand into a tSUBspi + a copy.
-let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
 def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-               NoItinerary, "@ sub $dst, $rhs * 4", []>;
+               NoItinerary, "@ sub\t$dst, $rhs * 4", []>;
 
 def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-               NoItinerary, "@ add $dst, $rhs", []>;
+               NoItinerary, "@ add\t$dst, $rhs", []>;
 
 let Defs = [CPSR] in
 def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-             NoItinerary, "@ and $dst, $rhs", []>;
-} // usesCustomDAGSchedInserter
+             NoItinerary, "@ and\t$dst, $rhs", []>;
+} // usesCustomInserter
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
 //
 
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx lr", [(ARMretflag)]>;
+  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>;
   // Alternative return instruction used by vararg functions.
-  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx $target", []>;
+  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst",
+                  [(brind GPR:$dst)]>;
 }
 
 // FIXME: remove when we have a way to marking a MI with these properties.
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
 def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
-                   "pop${p} $wb", []>;
+                   "pop${p}\t$wb", []>;
 
 let isCall = 1,
   Defs = [R0,  R1,  R2,  R3,  R12, LR,
@@ -193,25 +199,25 @@ let isCall = 1,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   // Also used for Thumb2
   def tBL  : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
-                   "bl ${func:call}",
+                   "bl\t${func:call}",
                    [(ARMtcall tglobaladdr:$func)]>,
              Requires<[IsThumb, IsNotDarwin]>;
 
   // ARMv5T and above, also used for Thumb2
   def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
-                    "blx ${func:call}",
+                    "blx\t${func:call}",
                     [(ARMcall tglobaladdr:$func)]>,
               Requires<[IsThumb, HasV5T, IsNotDarwin]>;
 
   // Also used for Thumb2
   def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, 
-                  "blx $func",
+                  "blx\t$func",
                   [(ARMtcall GPR:$func)]>,
               Requires<[IsThumb, HasV5T, IsNotDarwin]>;
 
   // ARMv4T
   def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, 
-                  "mov lr, pc\n\tbx $func",
+                  "mov\tlr, pc\n\tbx\t$func",
                   [(ARMcall_nolink tGPR:$func)]>,
             Requires<[IsThumb1Only, IsNotDarwin]>;
 }
@@ -224,25 +230,25 @@ let isCall = 1,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   // Also used for Thumb2
   def tBLr9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
-                   "bl ${func:call}",
+                   "bl\t${func:call}",
                    [(ARMtcall tglobaladdr:$func)]>,
               Requires<[IsThumb, IsDarwin]>;
 
   // ARMv5T and above, also used for Thumb2
   def tBLXi_r9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, 
-                      "blx ${func:call}",
+                      "blx\t${func:call}",
                       [(ARMcall tglobaladdr:$func)]>,
                  Requires<[IsThumb, HasV5T, IsDarwin]>;
 
   // Also used for Thumb2
   def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, 
-                  "blx $func",
+                  "blx\t$func",
                   [(ARMtcall GPR:$func)]>,
                  Requires<[IsThumb, HasV5T, IsDarwin]>;
 
   // ARMv4T
   def tBXr9 : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, 
-                  "mov lr, pc\n\tbx $func",
+                  "mov\tlr, pc\n\tbx\t$func",
                   [(ARMcall_nolink tGPR:$func)]>,
               Requires<[IsThumb1Only, IsDarwin]>;
 }
@@ -251,16 +257,16 @@ let isBranch = 1, isTerminator = 1 in {
   let isBarrier = 1 in {
     let isPredicable = 1 in
     def tB   : T1I<(outs), (ins brtarget:$target), IIC_Br,
-                   "b $target", [(br bb:$target)]>;
+                   "b\t$target", [(br bb:$target)]>;
 
   // Far jump
   let Defs = [LR] in
   def tBfar : TIx2<(outs), (ins brtarget:$target), IIC_Br, 
-                    "bl $target\t@ far jump",[]>;
+                    "bl\t$target\t@ far jump",[]>;
 
   def tBR_JTr : T1JTI<(outs),
                       (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
-                      IIC_Br, "mov pc, $target\n\t.align\t2\n$jt",
+                      IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt",
                       [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
   }
 }
@@ -269,79 +275,89 @@ let isBranch = 1, isTerminator = 1 in {
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
   def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br,
-                 "b$cc $target",
+                 "b$cc\t$target",
                  [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+  def tCBZ  : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br,
+                  "cbz\t$cmp, $target", []>;
+
+  def tCBNZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br,
+                  "cbnz\t$cmp, $target", []>;
+}
+
 //===----------------------------------------------------------------------===//
 //  Load Store Instructions.
 //
 
 let canFoldAsLoad = 1 in
 def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, 
-               "ldr", " $dst, $addr",
+               "ldr", "\t$dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
 
 def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
-                "ldrb", " $dst, $addr",
+                "ldrb", "\t$dst, $addr",
                 [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
 
 def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
-                "ldrh", " $dst, $addr",
+                "ldrh", "\t$dst, $addr",
                 [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
 
 let AddedComplexity = 10 in
 def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
-                 "ldrsb", " $dst, $addr",
+                 "ldrsb", "\t$dst, $addr",
                  [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
 
 let AddedComplexity = 10 in
 def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
-                 "ldrsh", " $dst, $addr",
+                 "ldrsh", "\t$dst, $addr",
                  [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
 
 let canFoldAsLoad = 1 in
 def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
-                  "ldr", " $dst, $addr",
+                  "ldr", "\t$dst, $addr",
                   [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
 
 // Special instruction for restore. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 let canFoldAsLoad = 1, mayLoad = 1 in
 def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
-                    "ldr", " $dst, $addr", []>;
+                    "ldr", "\t$dst, $addr", []>;
 
 // Load tconstpool
+// FIXME: Use ldr.n to work around a Darwin assembler bug.
 let canFoldAsLoad = 1 in
 def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
-                  "ldr", " $dst, $addr",
+                  "ldr", ".n\t$dst, $addr",
                   [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
 def tLDRcp  : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
-                  "ldr", " $dst, $addr", []>;
+                  "ldr", "\t$dst, $addr", []>;
 
 def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
-               "str", " $src, $addr",
+               "str", "\t$src, $addr",
                [(store tGPR:$src, t_addrmode_s4:$addr)]>;
 
 def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
-                 "strb", " $src, $addr",
+                 "strb", "\t$src, $addr",
                  [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
 
 def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
-                 "strh", " $src, $addr",
+                 "strh", "\t$src, $addr",
                  [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
 
 def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
-                   "str", " $src, $addr",
+                   "str", "\t$src, $addr",
                    [(store tGPR:$src, t_addrmode_sp:$addr)]>;
 
 let mayStore = 1 in {
 // Special instruction for spill. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
-                  "str", " $src, $addr", []>;
+                  "str", "\t$src, $addr", []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -353,21 +369,21 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def tLDM : T1I<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
                IIC_iLoadm,
-               "ldm${addr:submode}${p} $addr, $wb", []>;
+               "ldm${addr:submode}${p}\t$addr, $wb", []>;
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def tSTM : T1I<(outs),
                (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
                IIC_iStorem,
-               "stm${addr:submode}${p} $addr, $wb", []>;
+               "stm${addr:submode}${p}\t$addr, $wb", []>;
 
 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
 def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
-               "pop${p} $wb", []>;
+               "pop${p}\t$wb", []>;
 
 let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
 def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
-                "push${p} $wb", []>;
+                "push${p}\t$wb", []>;
 
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
@@ -376,66 +392,66 @@ def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
 // Add with carry register
 let isCommutable = 1, Uses = [CPSR] in
 def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "adc", " $dst, $rhs",
+                 "adc", "\t$dst, $rhs",
                  [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
 
 // Add immediate
 def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                   "add", " $dst, $lhs, $rhs",
+                   "add", "\t$dst, $lhs, $rhs",
                    [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
 
 def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                   "add", " $dst, $rhs",
+                   "add", "\t$dst, $rhs",
                    [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
 
 // Add register
 let isCommutable = 1 in
 def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                   "add", " $dst, $lhs, $rhs",
+                   "add", "\t$dst, $lhs, $rhs",
                    [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
 
 let neverHasSideEffects = 1 in
 def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                     "add", " $dst, $rhs", []>;
+                     "add", "\t$dst, $rhs", []>;
 
 // And register
 let isCommutable = 1 in
 def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "and", " $dst, $rhs",
+                 "and", "\t$dst, $rhs",
                  [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
 
 // ASR immediate
 def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                  "asr", " $dst, $lhs, $rhs",
+                  "asr", "\t$dst, $lhs, $rhs",
                   [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>;
 
 // ASR register
 def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                   "asr", " $dst, $rhs",
+                   "asr", "\t$dst, $rhs",
                    [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
 
 // BIC register
 def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "bic", " $dst, $rhs",
+                 "bic", "\t$dst, $rhs",
                  [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
 
 // CMN register
 let Defs = [CPSR] in {
 def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                "cmn", " $lhs, $rhs",
+                "cmn", "\t$lhs, $rhs",
                 [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
 def tCMNZ : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                 "cmn", " $lhs, $rhs",
+                 "cmn", "\t$lhs, $rhs",
                  [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
 }
 
 // CMP immediate
 let Defs = [CPSR] in {
 def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
-                  "cmp", " $lhs, $rhs",
+                  "cmp", "\t$lhs, $rhs",
                   [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
 def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
-                  "cmp", " $lhs, $rhs",
+                  "cmp", "\t$lhs, $rhs",
                   [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>;
 
 }
@@ -443,48 +459,48 @@ def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
 // CMP register
 let Defs = [CPSR] in {
 def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                 "cmp", " $lhs, $rhs",
+                 "cmp", "\t$lhs, $rhs",
                  [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
 def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                  "cmp", " $lhs, $rhs",
+                  "cmp", "\t$lhs, $rhs",
                   [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>;
 
 def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
-                   "cmp", " $lhs, $rhs", []>;
+                   "cmp", "\t$lhs, $rhs", []>;
 def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
-                    "cmp", " $lhs, $rhs", []>;
+                    "cmp", "\t$lhs, $rhs", []>;
 }
 
 
 // XOR register
 let isCommutable = 1 in
 def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "eor", " $dst, $rhs",
+                 "eor", "\t$dst, $rhs",
                  [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
 
 // LSL immediate
 def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                  "lsl", " $dst, $lhs, $rhs",
+                  "lsl", "\t$dst, $lhs, $rhs",
                   [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
 // LSL register
 def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                   "lsl", " $dst, $rhs",
+                   "lsl", "\t$dst, $rhs",
                    [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
 
 // LSR immediate
 def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                  "lsr", " $dst, $lhs, $rhs",
+                  "lsr", "\t$dst, $lhs, $rhs",
                   [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
 // LSR register
 def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                   "lsr", " $dst, $rhs",
+                   "lsr", "\t$dst, $rhs",
                    [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
 
 // move register
 def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
-                  "mov", " $dst, $src",
+                  "mov", "\t$dst, $src",
                   [(set tGPR:$dst, imm0_255:$src)]>;
 
 // TODO: A7-73: MOV(2) - mov setting flag.
@@ -493,45 +509,45 @@ def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
 let neverHasSideEffects = 1 in {
 // FIXME: Make this predicable.
 def tMOVr       : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                      "mov $dst, $src", []>;
+                      "mov\t$dst, $src", []>;
 let Defs = [CPSR] in
 def tMOVSr      : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                       "movs $dst, $src", []>;
+                       "movs\t$dst, $src", []>;
 
 // FIXME: Make these predicable.
 def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr,
-                       "mov $dst, $src", []>;
+                       "mov\t$dst, $src", []>;
 def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                       "mov $dst, $src", []>;
+                       "mov\t$dst, $src", []>;
 def tMOVgpr2gpr  : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
-                       "mov $dst, $src", []>;
+                       "mov\t$dst, $src", []>;
 } // neverHasSideEffects
 
 // multiply register
 let isCommutable = 1 in
 def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
-                 "mul", " $dst, $rhs",
+                 "mul", "\t$dst, $rhs",
                  [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
 
 // move inverse register
 def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                "mvn", " $dst, $src",
+                "mvn", "\t$dst, $src",
                 [(set tGPR:$dst, (not tGPR:$src))]>;
 
 // bitwise or register
 let isCommutable = 1 in
 def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),  IIC_iALUr,
-                 "orr", " $dst, $rhs",
+                 "orr", "\t$dst, $rhs",
                  [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
 
 // swaps
 def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                "rev", " $dst, $src",
+                "rev", "\t$dst, $src",
                 [(set tGPR:$dst, (bswap tGPR:$src))]>,
                 Requires<[IsThumb1Only, HasV6]>;
 
 def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "rev16", " $dst, $src",
+                  "rev16", "\t$dst, $src",
              [(set tGPR:$dst,
                    (or (and (srl tGPR:$src, (i32 8)), 0xFF),
                        (or (and (shl tGPR:$src, (i32 8)), 0xFF00),
@@ -540,7 +556,7 @@ def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
                 Requires<[IsThumb1Only, HasV6]>;
 
 def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "revsh", " $dst, $src",
+                  "revsh", "\t$dst, $src",
                   [(set tGPR:$dst,
                         (sext_inreg
                           (or (srl (and tGPR:$src, 0xFF00), (i32 8)),
@@ -549,70 +565,70 @@ def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
 
 // rotate right register
 def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                 "ror", " $dst, $rhs",
+                 "ror", "\t$dst, $rhs",
                  [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
 
 // negate register
 def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi,
-                "rsb", " $dst, $src, #0",
+                "rsb", "\t$dst, $src, #0",
                 [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
 // Subtract with carry register
 let Uses = [CPSR] in
 def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "sbc", " $dst, $rhs",
+                 "sbc", "\t$dst, $rhs",
                  [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
 
 // Subtract immediate
 def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                  "sub", " $dst, $lhs, $rhs",
+                  "sub", "\t$dst, $lhs, $rhs",
                   [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
 
 def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                   "sub", " $dst, $rhs",
+                   "sub", "\t$dst, $rhs",
                    [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
 
 // subtract register
 def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                  "sub", " $dst, $lhs, $rhs",
+                  "sub", "\t$dst, $lhs, $rhs",
                   [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
 
 // TODO: A7-96: STMIA - store multiple.
 
 // sign-extend byte
 def tSXTB  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "sxtb", " $dst, $src",
+                  "sxtb", "\t$dst, $src",
                   [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
                   Requires<[IsThumb1Only, HasV6]>;
 
 // sign-extend short
 def tSXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "sxth", " $dst, $src",
+                  "sxth", "\t$dst, $src",
                   [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
                   Requires<[IsThumb1Only, HasV6]>;
 
 // test
 let isCommutable = 1, Defs = [CPSR] in
 def tTST  : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                 "tst", " $lhs, $rhs",
+                 "tst", "\t$lhs, $rhs",
                  [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
 
 // zero-extend byte
 def tUXTB  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "uxtb", " $dst, $src",
+                  "uxtb", "\t$dst, $src",
                   [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
                   Requires<[IsThumb1Only, HasV6]>;
 
 // zero-extend short
 def tUXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "uxth", " $dst, $src",
+                  "uxth", "\t$dst, $src",
                   [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
                   Requires<[IsThumb1Only, HasV6]>;
 
 
 // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
-// Expanded by the scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1 in  // Expanded by the scheduler.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in  // Expanded after instruction selection.
   def tMOVCCr_pseudo :
   PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
               NoItinerary, "@ tMOVCCr $cc",
@@ -621,19 +637,19 @@ let usesCustomDAGSchedInserter = 1 in  // Expanded by the scheduler.
 
 // 16-bit movcc in IT blocks for Thumb2.
 def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
-                    "mov", " $dst, $rhs", []>;
+                    "mov", "\t$dst, $rhs", []>;
 
 def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
-                    "mov", " $dst, $rhs", []>;
+                    "mov", "\t$dst, $rhs", []>;
 
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
 def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
-                    "adr$p $dst, #$label", []>;
+                    "adr$p\t$dst, #$label", []>;
 
 def tLEApcrelJT : T1I<(outs tGPR:$dst),
                       (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                      IIC_iALUi, "adr$p $dst, #${label}_${id}", []>;
+                      IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>;
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
@@ -643,7 +659,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst),
 let isCall = 1,
   Defs = [R0, LR] in {
   def tTPsoft  : TIx2<(outs), (ins), IIC_Br,
-               "bl __aeabi_read_tp",
+               "bl\t__aeabi_read_tp",
                [(set R0, ARMthread_pointer)]>;
 }
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 2b6fa98..5bfda37 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -153,18 +153,18 @@ def t2addrmode_so_reg : Operand<i32>,
 multiclass T2I_un_irs<string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0>{
    // shifted imm
    def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
-                opc, " $dst, $src",
+                opc, "\t$dst, $src",
                 [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
      let isAsCheapAsAMove = Cheap;
      let isReMaterializable = ReMat;
    }
    // register
    def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
-               opc, ".w $dst, $src",
+               opc, ".w\t$dst, $src",
                 [(set GPR:$dst, (opnode GPR:$src))]>;
    // shifted register
    def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
-               opc, ".w $dst, $src",
+               opc, ".w\t$dst, $src",
                [(set GPR:$dst, (opnode t2_so_reg:$src))]>;
 }
 
@@ -175,17 +175,17 @@ multiclass T2I_bin_irs<string opc, PatFrag opnode,
                        bit Commutable = 0, string wide =""> {
    // shifted imm
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, " $dst, $lhs, $rhs",
+                 opc, "\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
    // register
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                 opc, !strconcat(wide, " $dst, $lhs, $rhs"),
+                 opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, !strconcat(wide, " $dst, $lhs, $rhs"),
+                 opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
@@ -200,11 +200,11 @@ multiclass T2I_bin_w_irs<string opc, PatFrag opnode, bit Commutable = 0> :
 multiclass T2I_rbin_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
-                opc, ".w $dst, $rhs, $lhs",
+                opc, ".w\t$dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
    def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
-                opc, " $dst, $rhs, $lhs",
+                opc, "\t$dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
 
@@ -214,17 +214,17 @@ let Defs = [CPSR] in {
 multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                !strconcat(opc, "s"), ".w $dst, $lhs, $rhs",
+                !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
    // register
    def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                !strconcat(opc, "s"), ".w $dst, $lhs, $rhs",
+                !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                !strconcat(opc, "s"), ".w $dst, $lhs, $rhs",
+                !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 }
@@ -234,21 +234,21 @@ multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> {
 multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
    // 12-bit imm
    def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
-                   !strconcat(opc, "w"), " $dst, $lhs, $rhs",
+                   !strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
                    [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
    // register
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
@@ -259,32 +259,32 @@ let Uses = [CPSR] in {
 multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, " $dst, $lhs, $rhs",
+                 opc, "\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
                  Requires<[IsThumb2, CarryDefIsUnused]>;
    // register
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
                  Requires<[IsThumb2, CarryDefIsUnused]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
                  Requires<[IsThumb2, CarryDefIsUnused]>;
    // Carry setting variants
    // shifted imm
    def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  !strconcat(opc, "s\t$dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
                   Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                   }
    // register
    def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                  !strconcat(opc, "s.w $dst, $lhs, $rhs"),
+                  !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
                   Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
@@ -292,7 +292,7 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    }
    // shifted register
    def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                  !strconcat(opc, "s.w $dst, $lhs, $rhs"),
+                  !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
                   Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
@@ -306,12 +306,12 @@ multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
                  IIC_iALUi,
-                 !strconcat(opc, "${s}.w $dst, $rhs, $lhs"),
+                 !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
    def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
                  IIC_iALUsi,
-                 !strconcat(opc, "${s} $dst, $rhs, $lhs"),
+                 !strconcat(opc, "${s}\t$dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
 }
@@ -321,11 +321,11 @@ multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
 multiclass T2I_sh_ir<string opc, PatFrag opnode> {
    // 5-bit imm
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>;
    // register
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr,
-                 opc, ".w $dst, $lhs, $rhs",
+                 opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>;
 }
 
@@ -336,15 +336,15 @@ let Defs = [CPSR] in {
 multiclass T2I_cmp_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
-                opc, ".w $lhs, $rhs",
+                opc, ".w\t$lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_imm:$rhs)]>;
    // register
    def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
-                opc, ".w $lhs, $rhs",
+                opc, ".w\t$lhs, $rhs",
                 [(opnode GPR:$lhs, GPR:$rhs)]>;
    // shifted register
    def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi,
-                opc, ".w $lhs, $rhs",
+                opc, ".w\t$lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_reg:$rhs)]>;
 }
 }
@@ -352,42 +352,44 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> {
 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
 multiclass T2I_ld<string opc, PatFrag opnode> {
   def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi,
-                   opc, ".w $dst, $addr",
+                   opc, ".w\t$dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>;
   def i8  : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi,
-                   opc, " $dst, $addr",
+                   opc, "\t$dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>;
   def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr,
-                   opc, ".w $dst, $addr",
+                   opc, ".w\t$dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>;
   def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
-                   opc, ".w $dst, $addr",
-                   [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>;
+                   opc, ".w\t$dst, $addr",
+                   [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> {
+    let isReMaterializable = 1;
+  }
 }
 
 /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
 multiclass T2I_st<string opc, PatFrag opnode> {
   def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei,
-                   opc, ".w $src, $addr",
+                   opc, ".w\t$src, $addr",
                    [(opnode GPR:$src, t2addrmode_imm12:$addr)]>;
   def i8  : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei,
-                   opc, " $src, $addr",
+                   opc, "\t$src, $addr",
                    [(opnode GPR:$src, t2addrmode_imm8:$addr)]>;
   def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer,
-                   opc, ".w $src, $addr",
+                   opc, ".w\t$src, $addr",
                    [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>;
 }
 
 /// T2I_picld - Defines the PIC load pattern.
 class T2I_picld<string opc, PatFrag opnode> :
       T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi,
-          !strconcat("\n${addr:label}:\n\t", opc), " $dst, $addr",
+          !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr",
           [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
 
 /// T2I_picst - Defines the PIC store pattern.
 class T2I_picst<string opc, PatFrag opnode> :
       T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer,
-          !strconcat("\n${addr:label}:\n\t", opc), " $src, $addr",
+          !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr",
           [(opnode GPR:$src, addrmodepc:$addr)]>;
 
 
@@ -395,10 +397,10 @@ class T2I_picst<string opc, PatFrag opnode> :
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass T2I_unary_rrot<string opc, PatFrag opnode> {
   def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-                  opc, ".w $dst, $src",
+                  opc, ".w\t$dst, $src",
                  [(set GPR:$dst, (opnode GPR:$src))]>;
   def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
-                  opc, ".w $dst, $src, ror $rot",
+                  opc, ".w\t$dst, $src, ror $rot",
                  [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>;
 }
 
@@ -406,10 +408,10 @@ multiclass T2I_unary_rrot<string opc, PatFrag opnode> {
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass T2I_bin_rrot<string opc, PatFrag opnode> {
   def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
-                  opc, " $dst, $LHS, $RHS",
+                  opc, "\t$dst, $LHS, $RHS",
                   [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>;
   def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
-                  IIC_iALUsr, opc, " $dst, $LHS, $RHS, ror $rot",
+                  IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
                   [(set GPR:$dst, (opnode GPR:$LHS,
                                           (rotr GPR:$RHS, rot_imm:$rot)))]>;
 }
@@ -425,43 +427,43 @@ multiclass T2I_bin_rrot<string opc, PatFrag opnode> {
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
 def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
-                      "adr$p.w $dst, #$label", []>;
+                      "adr$p.w\t$dst, #$label", []>;
 
 def t2LEApcrelJT : T2XI<(outs GPR:$dst),
                         (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
-                        "adr$p.w $dst, #${label}_${id}", []>;
+                        "adr$p.w\t$dst, #${label}_${id}", []>;
 
 // ADD r, sp, {so_imm|i12}
 def t2ADDrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                        IIC_iALUi, "add", ".w $dst, $sp, $imm", []>;
+                        IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []>;
 def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), 
-                       IIC_iALUi, "addw", " $dst, $sp, $imm", []>;
+                       IIC_iALUi, "addw", "\t$dst, $sp, $imm", []>;
 
 // ADD r, sp, so_reg
 def t2ADDrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
-                        IIC_iALUsi, "add", ".w $dst, $sp, $rhs", []>;
+                        IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []>;
 
 // SUB r, sp, {so_imm|i12}
 def t2SUBrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                        IIC_iALUi, "sub", ".w $dst, $sp, $imm", []>;
+                        IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []>;
 def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
-                       IIC_iALUi, "subw", " $dst, $sp, $imm", []>;
+                       IIC_iALUi, "subw", "\t$dst, $sp, $imm", []>;
 
 // SUB r, sp, so_reg
 def t2SUBrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
                        IIC_iALUsi,
-                       "sub", " $dst, $sp, $rhs", []>;
+                       "sub", "\t$dst, $sp, $rhs", []>;
 
 
 // Pseudo instruction that will expand into a t2SUBrSPi + a copy.
-let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
 def t2SUBrSPi_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                   NoItinerary, "@ sub.w $dst, $sp, $imm", []>;
+                   NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>;
 def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
-                   NoItinerary, "@ subw $dst, $sp, $imm", []>;
+                   NoItinerary, "@ subw\t$dst, $sp, $imm", []>;
 def t2SUBrSPs_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
-                   NoItinerary, "@ sub $dst, $sp, $rhs", []>;
-} // usesCustomDAGSchedInserter
+                   NoItinerary, "@ sub\t$dst, $sp, $rhs", []>;
+} // usesCustomInserter
 
 
 //===----------------------------------------------------------------------===//
@@ -484,10 +486,10 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def t2LDRDi8  : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2),
                         (ins t2addrmode_imm8s4:$addr),
-                        IIC_iLoadi, "ldrd", " $dst1, $addr", []>;
+                        IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
 def t2LDRDpci : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2),
                         (ins i32imm:$addr), IIC_iLoadi,
-                       "ldrd", " $dst1, $addr", []>;
+                       "ldrd", "\t$dst1, $addr", []>;
 }
 
 // zextload i1 -> zextload i8
@@ -535,57 +537,57 @@ let mayLoad = 1 in {
 def t2LDR_PRE  : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldr", " $dst, $addr!", "$addr.base = $base_wb",
+                            "ldr", "\t$dst, $addr!", "$addr.base = $base_wb",
                             []>;
 
 def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                           "ldr", " $dst, [$base], $offset", "$base = $base_wb",
+                          "ldr", "\t$dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrb", " $dst, $addr!", "$addr.base = $base_wb",
+                            "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                          "ldrb", " $dst, [$base], $offset", "$base = $base_wb",
+                         "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrh", " $dst, $addr!", "$addr.base = $base_wb",
+                            "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                          "ldrh", " $dst, [$base], $offset", "$base = $base_wb",
+                         "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrsb", " $dst, $addr!", "$addr.base = $base_wb",
+                            "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                         "ldrsb", " $dst, [$base], $offset", "$base = $base_wb",
+                        "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb",
                             []>;
 
 def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrsh", " $dst, $addr!", "$addr.base = $base_wb",
+                            "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb",
                             []>;
 def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
                             (ins GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                         "ldrsh", " $dst, [$base], $offset", "$base = $base_wb",
+                        "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
                             []>;
 }
 
@@ -598,48 +600,48 @@ defm t2STRH  : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STRDi8 : T2Ii8s4<(outs),
                        (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
-               IIC_iStorer, "strd", " $src1, $addr", []>;
+               IIC_iStorer, "strd", "\t$src1, $addr", []>;
 
 // Indexed stores
 def t2STR_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
-                          "str", " $src, [$base, $offset]!", "$base = $base_wb",
+                         "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
              [(set GPR:$base_wb,
                    (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
-                           "str", " $src, [$base], $offset", "$base = $base_wb",
+                          "str", "\t$src, [$base], $offset", "$base = $base_wb",
              [(set GPR:$base_wb,
                    (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRH_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
-                         "strh", " $src, [$base, $offset]!", "$base = $base_wb",
+                        "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
         [(set GPR:$base_wb,
               (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
-                          "strh", " $src, [$base], $offset", "$base = $base_wb",
+                         "strh", "\t$src, [$base], $offset", "$base = $base_wb",
        [(set GPR:$base_wb,
              (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRB_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
-                         "strb", " $src, [$base, $offset]!", "$base = $base_wb",
+                        "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
          [(set GPR:$base_wb,
                (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
-                          "strb", " $src, [$base], $offset", "$base = $base_wb",
+                         "strb", "\t$src, [$base], $offset", "$base = $base_wb",
         [(set GPR:$base_wb,
               (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
@@ -653,12 +655,12 @@ def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb),
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def t2LDM : T2XI<(outs),
                  (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-              IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide} $addr, $wb", []>;
+              IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>;
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STM : T2XI<(outs),
                  (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-              IIC_iStorem, "stm${addr:submode}${p}${addr:wide} $addr, $wb", []>;
+             IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>;
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
@@ -666,22 +668,22 @@ def t2STM : T2XI<(outs),
 
 let neverHasSideEffects = 1 in
 def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
-                   "mov", ".w $dst, $src", []>;
+                   "mov", ".w\t$dst, $src", []>;
 
 // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
 def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
-                   "mov", ".w $dst, $src",
+                   "mov", ".w\t$dst, $src",
                    [(set GPR:$dst, t2_so_imm:$src)]>;
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
-                   "movw", " $dst, $src",
+                   "movw", "\t$dst, $src",
                    [(set GPR:$dst, imm0_65535:$src)]>;
 
 let Constraints = "$src = $dst" in
 def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
-                    "movt", " $dst, $imm",
+                    "movt", "\t$dst, $imm",
                     [(set GPR:$dst,
                           (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>;
 
@@ -760,16 +762,16 @@ defm t2ROR  : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
 
 let Uses = [CPSR] in {
 def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
-                   "rrx", " $dst, $src",
+                   "rrx", "\t$dst, $src",
                    [(set GPR:$dst, (ARMrrx GPR:$src))]>;
 }
 
 let Defs = [CPSR] in {
 def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
-                         "lsrs.w $dst, $src, #1",
+                         "lsrs.w\t$dst, $src, #1",
                          [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>;
 def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
-                         "asrs.w $dst, $src, #1",
+                         "asrs.w\t$dst, $src, #1",
                          [(set GPR:$dst, (ARMsra_flag GPR:$src))]>;
 }
 
@@ -785,14 +787,14 @@ defm t2BIC  : T2I_bin_w_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
 let Constraints = "$src = $dst" in
 def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
-                IIC_iALUi, "bfc", " $dst, $imm",
+                IIC_iUNAsi, "bfc", "\t$dst, $imm",
                 [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>;
 
 def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
-                 IIC_iALUi, "sbfx", " $dst, $src, $lsb, $width", []>;
+                 IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []>;
 
 def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
-                 IIC_iALUi, "ubfx", " $dst, $src, $lsb, $width", []>;
+                 IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []>;
 
 // FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
 
@@ -819,80 +821,80 @@ def : T2Pat<(t2_so_imm_not:$src),
 //
 let isCommutable = 1 in
 def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
-                "mul", " $dst, $a, $b",
+                "mul", "\t$dst, $a, $b",
                 [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
 
 def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
-		"mla", " $dst, $a, $b, $c",
+		"mla", "\t$dst, $a, $b, $c",
 		[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
 def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
-		"mls", " $dst, $a, $b, $c",
+		"mls", "\t$dst, $a, $b, $c",
                 [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
 
 // Extra precision multiplies with low / high results
 let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
 def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
-                   "smull", " $ldst, $hdst, $a, $b", []>;
+                   "smull", "\t$ldst, $hdst, $a, $b", []>;
 
 def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
-                   "umull", " $ldst, $hdst, $a, $b", []>;
+                   "umull", "\t$ldst, $hdst, $a, $b", []>;
 }
 
 // Multiply + accumulate
 def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                  "smlal", " $ldst, $hdst, $a, $b", []>;
+                  "smlal", "\t$ldst, $hdst, $a, $b", []>;
 
 def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                  "umlal", " $ldst, $hdst, $a, $b", []>;
+                  "umlal", "\t$ldst, $hdst, $a, $b", []>;
 
 def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                  "umaal", " $ldst, $hdst, $a, $b", []>;
+                  "umaal", "\t$ldst, $hdst, $a, $b", []>;
 } // neverHasSideEffects
 
 // Most significant word multiply
 def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
-                  "smmul", " $dst, $a, $b",
+                  "smmul", "\t$dst, $a, $b",
                   [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
 
 def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
-                  "smmla", " $dst, $a, $b, $c",
+                  "smmla", "\t$dst, $a, $b, $c",
                   [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
 
 
 def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
-                   "smmls", " $dst, $a, $b, $c",
+                   "smmls", "\t$dst, $a, $b, $c",
                    [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
 
 multiclass T2I_smul<string opc, PatFrag opnode> {
   def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
-              !strconcat(opc, "bb"), " $dst, $a, $b",
+              !strconcat(opc, "bb"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
                                       (sext_inreg GPR:$b, i16)))]>;
 
   def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
-              !strconcat(opc, "bt"), " $dst, $a, $b",
+              !strconcat(opc, "bt"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
                                       (sra GPR:$b, (i32 16))))]>;
 
   def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
-              !strconcat(opc, "tb"), " $dst, $a, $b",
+              !strconcat(opc, "tb"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
                                       (sext_inreg GPR:$b, i16)))]>;
 
   def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
-              !strconcat(opc, "tt"), " $dst, $a, $b",
+              !strconcat(opc, "tt"), "\t$dst, $a, $b",
               [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
                                       (sra GPR:$b, (i32 16))))]>;
 
   def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
-              !strconcat(opc, "wb"), " $dst, $a, $b",
+              !strconcat(opc, "wb"), "\t$dst, $a, $b",
               [(set GPR:$dst, (sra (opnode GPR:$a,
                                     (sext_inreg GPR:$b, i16)), (i32 16)))]>;
 
   def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
-              !strconcat(opc, "wt"), " $dst, $a, $b",
+              !strconcat(opc, "wt"), "\t$dst, $a, $b",
               [(set GPR:$dst, (sra (opnode GPR:$a,
                                     (sra GPR:$b, (i32 16))), (i32 16)))]>;
 }
@@ -900,33 +902,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
 
 multiclass T2I_smla<string opc, PatFrag opnode> {
   def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+              !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc,
                                (opnode (sext_inreg GPR:$a, i16),
                                        (sext_inreg GPR:$b, i16))))]>;
 
   def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
-             !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+             !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
              [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
                                                     (sra GPR:$b, (i32 16)))))]>;
 
   def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+              !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
                                                  (sext_inreg GPR:$b, i16))))]>;
 
   def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+              !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
                                                     (sra GPR:$b, (i32 16)))))]>;
 
   def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+              !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
                                        (sext_inreg GPR:$b, i16)), (i32 16))))]>;
 
   def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+              !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
                                          (sra GPR:$b, (i32 16))), (i32 16))))]>;
 }
@@ -943,15 +945,15 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 //
 
 def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-                "clz", " $dst, $src",
+                "clz", "\t$dst, $src",
                 [(set GPR:$dst, (ctlz GPR:$src))]>;
 
 def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-                "rev", ".w $dst, $src",
+                "rev", ".w\t$dst, $src",
                 [(set GPR:$dst, (bswap GPR:$src))]>;
 
 def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-                "rev16", ".w $dst, $src",
+                "rev16", ".w\t$dst, $src",
                 [(set GPR:$dst,
                     (or (and (srl GPR:$src, (i32 8)), 0xFF),
                         (or (and (shl GPR:$src, (i32 8)), 0xFF00),
@@ -959,14 +961,14 @@ def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                                 (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
 
 def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-                 "revsh", ".w $dst, $src",
+                 "revsh", ".w\t$dst, $src",
                  [(set GPR:$dst,
                     (sext_inreg
                       (or (srl (and GPR:$src, 0xFF00), (i32 8)),
                           (shl GPR:$src, (i32 8))), i16))]>;
 
 def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-                  IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+                  IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
                   [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
                                       (and (shl GPR:$src2, (i32 imm:$shamt)),
                                            0xFFFF0000)))]>;
@@ -978,7 +980,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
             (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
 
 def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-                  IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+                  IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
                   [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
                                       (and (sra GPR:$src2, imm16_31:$shamt),
                                            0xFFFF)))]>;
@@ -1025,26 +1027,26 @@ defm t2TEQ  : T2I_cmp_is<"teq",
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :( 
 def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
-                   "mov", ".w $dst, $true",
+                   "mov", ".w\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">;
 
 def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true),
-                   IIC_iCMOVi, "mov", ".w $dst, $true",
+                   IIC_iCMOVi, "mov", ".w\t$dst, $true",
 [/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
                    RegConstraint<"$false = $dst">;
 
 def t2MOVCClsl : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
-                   IIC_iCMOVsi, "lsl", ".w $dst, $true, $rhs", []>,
+                   IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
                    RegConstraint<"$false = $dst">;
 def t2MOVCClsr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
-                   IIC_iCMOVsi, "lsr", ".w $dst, $true, $rhs", []>,
+                   IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
                    RegConstraint<"$false = $dst">;
 def t2MOVCCasr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
-                   IIC_iCMOVsi, "asr", ".w $dst, $true, $rhs", []>,
+                   IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
                    RegConstraint<"$false = $dst">;
 def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
-                   IIC_iCMOVsi, "ror", ".w $dst, $true, $rhs", []>,
+                   IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
                    RegConstraint<"$false = $dst">;
 
 //===----------------------------------------------------------------------===//
@@ -1055,7 +1057,7 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
 let isCall = 1,
   Defs = [R0, R12, LR, CPSR] in {
   def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
-                     "bl __aeabi_read_tp",
+                     "bl\t__aeabi_read_tp",
                      [(set R0, ARMthread_pointer)]>;
 }
 
@@ -1078,13 +1080,13 @@ let Defs =
     D31 ] in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src),
                                AddrModeNone, SizeSpecial, NoItinerary,
-                               "str.w sp, [$src, #+8] @ eh_setjmp begin\n"
-                               "\tadr r12, 0f\n"
-                               "\torr r12, #1\n"
-                               "\tstr.w r12, [$src, #+4]\n"
-                               "\tmovs r0, #0\n"
-                               "\tb 1f\n"
-                               "0:\tmovs r0, #1 @ eh_setjmp end\n"
+                               "str.w\tsp, [$src, #+8] @ eh_setjmp begin\n"
+                               "\tadr\tr12, 0f\n"
+                               "\torr.w\tr12, r12, #1\n"
+                               "\tstr.w\tr12, [$src, #+4]\n"
+                               "\tmovs\tr0, #0\n"
+                               "\tb\t1f\n"
+                               "0:\tmovs\tr0, #1 @ eh_setjmp end\n"
                                "1:", "",
                                [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
 }
@@ -1103,32 +1105,32 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
   def t2LDM_RET : T2XI<(outs),
                     (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-                    IIC_Br, "ldm${addr:submode}${p}${addr:wide} $addr, $wb",
+                    IIC_Br, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb",
                     []>;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 let isPredicable = 1 in
 def t2B   : T2XI<(outs), (ins brtarget:$target), IIC_Br,
-                 "b.w $target",
+                 "b.w\t$target",
                  [(br bb:$target)]>;
 
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
 def t2BR_JT :
     T2JTI<(outs),
           (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
-           IIC_Br, "mov pc, $target\n$jt",
+           IIC_Br, "mov\tpc, $target\n$jt",
           [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
 
 // FIXME: Add a non-pc based case that can be predicated.
 def t2TBB :
     T2JTI<(outs),
         (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
-         IIC_Br, "tbb $index\n$jt", []>;
+         IIC_Br, "tbb\t$index\n$jt", []>;
 
 def t2TBH :
     T2JTI<(outs),
         (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
-         IIC_Br, "tbh $index\n$jt", []>;
+         IIC_Br, "tbh\t$index\n$jt", []>;
 } // isNotDuplicable, isIndirectBranch
 
 } // isBranch, isTerminator, isBarrier
@@ -1137,14 +1139,14 @@ def t2TBH :
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
 def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
-                "b", ".w $target",
+                "b", ".w\t$target",
                 [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
 
 // IT block
 def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
                     AddrModeNone, Size2Bytes,  IIC_iALUx,
-                    "it$mask $cc", "", []>;
+                    "it$mask\t$cc", "", []>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -1175,5 +1177,5 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 // when we can do generalized remat.
 let isReMaterializable = 1 in
 def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
-                     "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                   "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 56336d1..455c33b 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -31,25 +31,45 @@ def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
 def arm_fmdrr  : SDNode<"ARMISD::FMDRR",  SDT_FMDRR>;
 
 //===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+
+def vfp_f32imm : Operand<f32>,
+                 PatLeaf<(f32 fpimm), [{
+      return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
+    }]> {
+  let PrintMethod = "printVFPf32ImmOperand";
+}
+
+def vfp_f64imm : Operand<f64>,
+                 PatLeaf<(f64 fpimm), [{
+      return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
+    }]> {
+  let PrintMethod = "printVFPf64ImmOperand";
+}
+
+
+//===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
 
 let canFoldAsLoad = 1 in {
 def FLDD  : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
-                 IIC_fpLoad64, "fldd", " $dst, $addr",
+                 IIC_fpLoad64, "fldd", "\t$dst, $addr",
                  [(set DPR:$dst, (load addrmode5:$addr))]>;
 
 def FLDS  : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
-                 IIC_fpLoad32, "flds", " $dst, $addr",
+                 IIC_fpLoad32, "flds", "\t$dst, $addr",
                  [(set SPR:$dst, (load addrmode5:$addr))]>;
 } // canFoldAsLoad
 
 def FSTD  : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
-                 IIC_fpStore64, "fstd", " $src, $addr",
+                 IIC_fpStore64, "fstd", "\t$src, $addr",
                  [(store DPR:$src, addrmode5:$addr)]>;
 
 def FSTS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
-                 IIC_fpStore32, "fsts", " $src, $addr",
+                 IIC_fpStore32, "fsts", "\t$src, $addr",
                  [(store SPR:$src, addrmode5:$addr)]>;
 
 //===----------------------------------------------------------------------===//
@@ -59,14 +79,14 @@ def FSTS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpLoadm,
-                  "fldm${addr:submode}d${p} ${addr:base}, $wb",
+                  "fldm${addr:submode}d${p}\t${addr:base}, $wb",
                   []> {
   let Inst{20} = 1;
 }
 
 def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpLoadm, 
-                  "fldm${addr:submode}s${p} ${addr:base}, $wb",
+                  "fldm${addr:submode}s${p}\t${addr:base}, $wb",
                   []> {
   let Inst{20} = 1;
 }
@@ -75,14 +95,14 @@ def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
 def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpStorem,
-                 "fstm${addr:submode}d${p} ${addr:base}, $wb",
+                 "fstm${addr:submode}d${p}\t${addr:base}, $wb",
                  []> {
   let Inst{20} = 0;
 }
 
 def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
                            variable_ops), IIC_fpStorem,
-                 "fstm${addr:submode}s${p} ${addr:base}, $wb",
+                 "fstm${addr:submode}s${p}\t${addr:base}, $wb",
                  []> {
   let Inst{20} = 0;
 }
@@ -95,48 +115,48 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
 //
 
 def FADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpALU64, "faddd", " $dst, $a, $b",
+                 IIC_fpALU64, "faddd", "\t$dst, $a, $b",
                  [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
 
 def FADDS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpALU32, "fadds", " $dst, $a, $b",
+                  IIC_fpALU32, "fadds", "\t$dst, $a, $b",
                   [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
 
 // These are encoded as unary instructions.
 let Defs = [FPSCR] in {
 def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
-                 IIC_fpCMP64, "fcmped", " $a, $b",
+                 IIC_fpCMP64, "fcmped", "\t$a, $b",
                  [(arm_cmpfp DPR:$a, DPR:$b)]>;
 
 def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
-                 IIC_fpCMP32, "fcmpes", " $a, $b",
+                 IIC_fpCMP32, "fcmpes", "\t$a, $b",
                  [(arm_cmpfp SPR:$a, SPR:$b)]>;
 }
 
 def FDIVD  : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpDIV64, "fdivd", " $dst, $a, $b",
+                 IIC_fpDIV64, "fdivd", "\t$dst, $a, $b",
                  [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
 
 def FDIVS  : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 IIC_fpDIV32, "fdivs", " $dst, $a, $b",
+                 IIC_fpDIV32, "fdivs", "\t$dst, $a, $b",
                  [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
 
 def FMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpMUL64, "fmuld", " $dst, $a, $b",
+                 IIC_fpMUL64, "fmuld", "\t$dst, $a, $b",
                  [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
 
 def FMULS  : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpMUL32, "fmuls", " $dst, $a, $b",
+                  IIC_fpMUL32, "fmuls", "\t$dst, $a, $b",
                   [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
                  
 def FNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                  IIC_fpMUL64, "fnmuld", " $dst, $a, $b",
+                  IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b",
                   [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
   let Inst{6} = 1;
 }
 
 def FNMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpMUL32, "fnmuls", " $dst, $a, $b",
+                  IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b",
                   [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
   let Inst{6} = 1;
 }
@@ -149,13 +169,13 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
 
 
 def FSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpALU64, "fsubd", " $dst, $a, $b",
+                 IIC_fpALU64, "fsubd", "\t$dst, $a, $b",
                  [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
   let Inst{6} = 1;
 }
 
 def FSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpALU32, "fsubs", " $dst, $a, $b",
+                  IIC_fpALU32, "fsubs", "\t$dst, $a, $b",
                   [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
   let Inst{6} = 1;
 }
@@ -165,30 +185,30 @@ def FSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
 //
 
 def FABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "fabsd", " $dst, $a",
+                 IIC_fpUNA64, "fabsd", "\t$dst, $a",
                  [(set DPR:$dst, (fabs DPR:$a))]>;
 
 def FABSS  : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                  IIC_fpUNA32, "fabss", " $dst, $a",
+                  IIC_fpUNA32, "fabss", "\t$dst, $a",
                   [(set SPR:$dst, (fabs SPR:$a))]>;
 
 let Defs = [FPSCR] in {
 def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
-                  IIC_fpCMP64, "fcmpezd", " $a",
+                  IIC_fpCMP64, "fcmpezd", "\t$a",
                   [(arm_cmpfp0 DPR:$a)]>;
 
 def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
-                  IIC_fpCMP32, "fcmpezs", " $a",
+                  IIC_fpCMP32, "fcmpezs", "\t$a",
                   [(arm_cmpfp0 SPR:$a)]>;
 }
 
 def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTDS, "fcvtds", " $dst, $a",
+                 IIC_fpCVTDS, "fcvtds", "\t$dst, $a",
                  [(set DPR:$dst, (fextend SPR:$a))]>;
 
 // Special case encoding: bits 11-8 is 0b1011.
 def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
-                   IIC_fpCVTSD, "fcvtsd", " $dst, $a",
+                   IIC_fpCVTSD, "fcvtsd", "\t$dst, $a",
                    [(set SPR:$dst, (fround DPR:$a))]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-16} = 0b110111;
@@ -198,26 +218,26 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 
 let neverHasSideEffects = 1 in {
 def FCPYD  : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "fcpyd", " $dst, $a", []>;
+                 IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>;
 
 def FCPYS  : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpUNA32, "fcpys", " $dst, $a", []>;
+                 IIC_fpUNA32, "fcpys", "\t$dst, $a", []>;
 } // neverHasSideEffects
 
 def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "fnegd", " $dst, $a",
+                 IIC_fpUNA64, "fnegd", "\t$dst, $a",
                  [(set DPR:$dst, (fneg DPR:$a))]>;
 
 def FNEGS  : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
-                  IIC_fpUNA32, "fnegs", " $dst, $a",
+                  IIC_fpUNA32, "fnegs", "\t$dst, $a",
                   [(set SPR:$dst, (fneg SPR:$a))]>;
 
 def FSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpSQRT64, "fsqrtd", " $dst, $a",
+                 IIC_fpSQRT64, "fsqrtd", "\t$dst, $a",
                  [(set DPR:$dst, (fsqrt DPR:$a))]>;
 
 def FSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpSQRT32, "fsqrts", " $dst, $a",
+                 IIC_fpSQRT32, "fsqrts", "\t$dst, $a",
                  [(set SPR:$dst, (fsqrt SPR:$a))]>;
 
 //===----------------------------------------------------------------------===//
@@ -225,16 +245,16 @@ def FSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
 //
 
 def FMRS   : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 IIC_VMOVSI, "fmrs", " $dst, $src",
+                 IIC_VMOVSI, "fmrs", "\t$dst, $src",
                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
 
 def FMSR   : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 IIC_VMOVIS, "fmsr", " $dst, $src",
+                 IIC_VMOVIS, "fmsr", "\t$dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
 def FMRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
-                 IIC_VMOVDI, "fmrrd", " $wb, $dst2, $src",
+                 IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src",
                  [/* FIXME: Can't write pattern for multiple result instr*/]>;
 
 // FMDHR: GPR -> SPR
@@ -242,7 +262,7 @@ def FMRRD  : AVConv3I<0b11000101, 0b1011,
 
 def FMDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "fmdrr", " $dst, $src1, $src2",
+                IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2",
                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
 
 // FMRDH: SPR -> GPR
@@ -258,23 +278,23 @@ def FMDRR : AVConv5I<0b11000100, 0b1011,
 // Int to FP:
 
 def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTID, "fsitod", " $dst, $a",
+                 IIC_fpCVTID, "fsitod", "\t$dst, $a",
                  [(set DPR:$dst, (arm_sitof SPR:$a))]> {
   let Inst{7} = 1;
 }
 
 def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
-                 IIC_fpCVTIS, "fsitos", " $dst, $a",
+                 IIC_fpCVTIS, "fsitos", "\t$dst, $a",
                  [(set SPR:$dst, (arm_sitof SPR:$a))]> {
   let Inst{7} = 1;
 }
 
 def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTID, "fuitod", " $dst, $a",
+                 IIC_fpCVTID, "fuitod", "\t$dst, $a",
                  [(set DPR:$dst, (arm_uitof SPR:$a))]>;
 
 def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
-                 IIC_fpCVTIS, "fuitos", " $dst, $a",
+                 IIC_fpCVTIS, "fuitos", "\t$dst, $a",
                  [(set SPR:$dst, (arm_uitof SPR:$a))]>;
 
 // FP to Int:
@@ -282,28 +302,28 @@ def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
 
 def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "ftosizd", " $dst, $a",
+                 IIC_fpCVTDI, "ftosizd", "\t$dst, $a",
                  [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
 def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "ftosizs", " $dst, $a",
+                 IIC_fpCVTSI, "ftosizs", "\t$dst, $a",
                  [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
 def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "ftouizd", " $dst, $a",
+                 IIC_fpCVTDI, "ftouizd", "\t$dst, $a",
                  [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
 def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "ftouizs", " $dst, $a",
+                 IIC_fpCVTSI, "ftouizs", "\t$dst, $a",
                  [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
@@ -313,34 +333,34 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
 //
 
 def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "fmacd", " $dst, $a, $b",
+                IIC_fpMAC64, "fmacd", "\t$dst, $a, $b",
                 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
 def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                 IIC_fpMAC32, "fmacs", " $dst, $a, $b",
+                 IIC_fpMAC32, "fmacs", "\t$dst, $a, $b",
                  [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                  RegConstraint<"$dstin = $dst">;
 
 def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "fmscd", " $dst, $a, $b",
+                IIC_fpMAC64, "fmscd", "\t$dst, $a, $b",
                 [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
 def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "fmscs", " $dst, $a, $b",
+                IIC_fpMAC32, "fmscs", "\t$dst, $a, $b",
                 [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
 def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "fnmacd", " $dst, $a, $b",
+                 IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b",
              [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
 def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                  IIC_fpMAC32, "fnmacs", " $dst, $a, $b",
+                  IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b",
              [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
@@ -352,14 +372,14 @@ def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
           (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
 def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "fnmscd", " $dst, $a, $b",
+                 IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b",
              [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
 }
 
 def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "fnmscs", " $dst, $a, $b",
+                IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b",
              [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst"> {
   let Inst{6} = 1;
@@ -371,25 +391,25 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
 
 def FCPYDcc  : ADuI<0b11101011, 0b0000, 0b0100,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    IIC_fpUNA64, "fcpyd", " $dst, $true",
+                    IIC_fpUNA64, "fcpyd", "\t$dst, $true",
                 [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
 def FCPYScc  : ASuI<0b11101011, 0b0000, 0b0100,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    IIC_fpUNA32, "fcpys", " $dst, $true",
+                    IIC_fpUNA32, "fcpys", "\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
 def FNEGDcc  : ADuI<0b11101011, 0b0001, 0b0100,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    IIC_fpUNA64, "fnegd", " $dst, $true",
+                    IIC_fpUNA64, "fnegd", "\t$dst, $true",
                 [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
 def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    IIC_fpUNA32, "fnegs", " $dst, $true",
+                    IIC_fpUNA32, "fnegs", "\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
@@ -399,7 +419,8 @@ def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
 //
 
 let Defs = [CPSR], Uses = [FPSCR] in
-def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fmstat)]> {
+def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
+             [(arm_fmstat)]> {
   let Inst{27-20} = 0b11101111;
   let Inst{19-16} = 0b0001;
   let Inst{15-12} = 0b1111;
@@ -407,3 +428,29 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fm
   let Inst{7}     = 0;
   let Inst{4}     = 1;
 }
+
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in 
+def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
+                    VFPMiscFrm, IIC_VMOVImm,
+                    "fconsts", "\t$dst, $imm",
+                    [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+  let Inst{27-23} = 0b11101;
+  let Inst{21-20} = 0b11;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;
+  let Inst{7-4}   = 0b0000;
+}
+
+let isReMaterializable = 1 in 
+def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+                    VFPMiscFrm, IIC_VMOVImm,
+                    "fconstd", "\t$dst, $imm",
+                    [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+  let Inst{27-23} = 0b11101;
+  let Inst{21-20} = 0b11;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;
+  let Inst{7-4}   = 0b0000;
+}
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c9b9e84..7e1783b 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -56,7 +55,7 @@ STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
 /// load / store instructions to form ldm / stm instructions.
 
 namespace {
-  struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+  struct ARMLoadStoreOpt : public MachineFunctionPass {
     static char ID;
     ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
 
@@ -1106,7 +1105,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
 /// likely they will be combined later.
 
 namespace {
-  struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
     static char ID;
     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index e0be784..d393e8d 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -129,9 +129,6 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
-  // FIXME: We are reserving r12 in case the PEI needs to use it to
-  // generate large stack offset. Make it available once we have register
-  // scavenging. Similarly r3 is reserved in Thumb mode for now.
   let MethodBodies = [{
     // FP is R11, R9 is available.
     static const unsigned ARM_GPR_AO_1[] = {
@@ -169,10 +166,20 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       ARM::R4, ARM::R5, ARM::R6,
       ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 };
 
+    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+    // don't know how to spill them. If we make our prologue/epilogue code
+    // smarter at some point, we can go back to using the above allocation
+    // orders for the Thumb1 instructions that know how to use hi regs.
+    static const unsigned THUMB_GPR_AO[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
     GPRClass::iterator
     GPRClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.isThumb1Only())
+        return THUMB_GPR_AO;
       if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved())
           return ARM_GPR_AO_4;
@@ -195,6 +202,12 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
       GPRClass::iterator I;
 
+      if (Subtarget.isThumb1Only()) {
+        I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+        // Mac OS X requires FP not to be clobbered for backtracing purpose.
+        return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+      }
+
       if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved())
           I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
@@ -316,7 +329,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
 
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v2i32, v2f32], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                              [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
                               D8,  D9,  D10, D11, D12, D13, D14, D15]> {
   let SubRegClassList = [SPR, SPR];
@@ -324,7 +337,7 @@ def DPR_VFP2 : RegisterClass<"ARM", [f64, v2i32, v2f32], 64,
 
 // Subset of DPR which can be used as a source of NEON scalars for 16-bit
 // operations
-def DPR_8 : RegisterClass<"ARM", [f64, v4i16, v2f32], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                           [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7]> {
   let SubRegClassList = [SPR_8, SPR_8];
 }
@@ -344,6 +357,13 @@ def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
   let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2];
 }
 
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                           128,
+                           [Q0,  Q1,  Q2,  Q3]> {
+  let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8];
+}
+
 // Condition code registers.
 def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
 
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 7478159..e721a7f 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -130,7 +130,7 @@ protected:
   /// for Thumb1.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtarget::AntiDepBreakMode& mode) const {
-    mode = TargetSubtarget::ANTIDEP_NONE;
+    mode = TargetSubtarget::ANTIDEP_CRITICAL;
     return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
   }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index c1da6ce..b4ce1d7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -112,8 +112,12 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
 bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
-  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
-    PM.add(createIfConverterPass());
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createIfConverterPass());
+    if (Subtarget.hasNEON())
+      PM.add(createNEONMoveFixPass());
+  }
 
   if (Subtarget.isThumb2()) {
     PM.add(createThumb2ITBlockPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 71a5348..dd9542e 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -103,7 +103,7 @@ public:
   ThumbTargetMachine(const Target &T, const std::string &TT,
                      const std::string &FS);
 
-  /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo
+  /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
   virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
     return &InstrInfo->getRegisterInfo();
   }
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 403f96c..894f913a 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -44,13 +44,21 @@ private:
 
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
 
-  bool ParseRegister(ARMOperand &Op);
+  bool MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack);
 
   bool ParseRegisterList(ARMOperand &Op);
 
   bool ParseMemory(ARMOperand &Op);
 
-  bool ParseShift(enum ShiftType *St, const MCExpr *&ShiftAmount);
+  bool ParseMemoryOffsetReg(bool &Negative,
+                            bool &OffsetRegShifted,
+                            enum ShiftType &ShiftType,
+                            const MCExpr *&ShiftAmount,
+                            const MCExpr *&Offset,
+                            bool &OffsetIsReg,
+                            int &OffsetRegNum);
+
+  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount);
 
   bool ParseOperand(ARMOperand &Op);
 
@@ -123,16 +131,17 @@ struct ARMOperand {
     // This is for all forms of ARM address expressions
     struct {
       unsigned BaseRegNum;
-      bool OffsetIsReg;
-      const MCExpr *Offset; // used when OffsetIsReg is false
       unsigned OffsetRegNum; // used when OffsetIsReg is true
-      bool OffsetRegShifted; // only used when OffsetIsReg is true
-      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
+      const MCExpr *Offset; // used when OffsetIsReg is false
       const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
-      bool Preindexed;
-      bool Postindexed;
-      bool Negative; // only used when OffsetIsReg is true
-      bool Writeback;
+      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
+      unsigned
+        OffsetRegShifted : 1, // only used when OffsetIsReg is true
+        Preindexed : 1,
+        Postindexed : 1,
+        OffsetIsReg : 1,
+        Negative : 1, // only used when OffsetIsReg is true
+        Writeback : 1;
     } Mem;
 
   };
@@ -208,12 +217,12 @@ struct ARMOperand {
 
 } // end anonymous namespace.
 
-// Try to parse a register name.  The token must be an Identifier when called,
-// and if it is a register name a Reg operand is created, the token is eaten
-// and false is returned.  Else true is returned and no token is eaten.
-// TODO this is likely to change to allow different register types and or to
-// parse for a specific register type.
-bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
+/// Try to parse a register name.  The token must be an Identifier when called,
+/// and if it is a register name a Reg operand is created, the token is eaten
+/// and false is returned.  Else true is returned and no token is eaten.
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack) {
   const AsmToken &Tok = getLexer().getTok();
   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 
@@ -227,10 +236,12 @@ bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
   getLexer().Lex(); // Eat identifier token.
 
   bool Writeback = false;
-  const AsmToken &ExclaimTok = getLexer().getTok();
-  if (ExclaimTok.is(AsmToken::Exclaim)) {
-    Writeback = true;
-    getLexer().Lex(); // Eat exclaim token
+  if (ParseWriteBack) {
+    const AsmToken &ExclaimTok = getLexer().getTok();
+    if (ExclaimTok.is(AsmToken::Exclaim)) {
+      Writeback = true;
+      getLexer().Lex(); // Eat exclaim token
+    }
   }
 
   Op = ARMOperand::CreateReg(RegNum, Writeback);
@@ -238,8 +249,8 @@ bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
   return false;
 }
 
-// Parse a register list, return false if successful else return true or an 
-// error.  The first token must be a '{' when called.
+/// Parse a register list, return false if successful else return true or an 
+/// error.  The first token must be a '{' when called.
 bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
   assert(getLexer().getTok().is(AsmToken::LCurly) &&
          "Token is not an Left Curly Brace");
@@ -285,10 +296,10 @@ bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
   return false;
 }
 
-// Parse an arm memory expression, return false if successful else return true
-// or an error.  The first token must be a '[' when called.
-// TODO Only preindexing and postindexing addressing are started, unindexed
-// with option, etc are still to do.
+/// Parse an arm memory expression, return false if successful else return true
+/// or an error.  The first token must be a '[' when called.
+/// TODO Only preindexing and postindexing addressing are started, unindexed
+/// with option, etc are still to do.
 bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
   assert(getLexer().getTok().is(AsmToken::LBrac) &&
          "Token is not an Left Bracket");
@@ -297,10 +308,9 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
   const AsmToken &BaseRegTok = getLexer().getTok();
   if (BaseRegTok.isNot(AsmToken::Identifier))
     return Error(BaseRegTok.getLoc(), "register expected");
-  int BaseRegNum = MatchRegisterName(BaseRegTok.getString());
-  if (BaseRegNum == -1)
+  if (MaybeParseRegister(Op, false))
     return Error(BaseRegTok.getLoc(), "register expected");
-  getLexer().Lex(); // Eat identifier token.
+  int BaseRegNum = Op.getReg();
 
   bool Preindexed = false;
   bool Postindexed = false;
@@ -308,55 +318,20 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
   bool Negative = false;
   bool Writeback = false;
 
-  // First look for preindexed address forms:
-  //  [Rn, +/-Rm]
-  //  [Rn, #offset]
-  //  [Rn, +/-Rm, shift]
-  // that is after the "[Rn" we now have see if the next token is a comma.
+  // First look for preindexed address forms, that is after the "[Rn" we now
+  // have to see if the next token is a comma.
   const AsmToken &Tok = getLexer().getTok();
   if (Tok.is(AsmToken::Comma)) {
     Preindexed = true;
     getLexer().Lex(); // Eat comma token.
-
-    const AsmToken &NextTok = getLexer().getTok();
-    if (NextTok.is(AsmToken::Plus))
-      getLexer().Lex(); // Eat plus token.
-    else if (NextTok.is(AsmToken::Minus)) {
-      Negative = true;
-      getLexer().Lex(); // Eat minus token
-    }
-
-    // See if there is a register following the "[Rn," we have so far.
-    const AsmToken &OffsetRegTok = getLexer().getTok();
-    int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
-    bool OffsetRegShifted = false;
+    int OffsetRegNum;
+    bool OffsetRegShifted;
     enum ShiftType ShiftType;
     const MCExpr *ShiftAmount;
     const MCExpr *Offset;
-    if (OffsetRegNum != -1) {
-      OffsetIsReg = true;
-      getLexer().Lex(); // Eat identifier token for the offset register.
-      // Look for a comma then a shift
-      const AsmToken &Tok = getLexer().getTok();
-      if (Tok.is(AsmToken::Comma)) {
-        getLexer().Lex(); // Eat comma token.
-
-        const AsmToken &Tok = getLexer().getTok();
-        if (ParseShift(&ShiftType, ShiftAmount))
-          return Error(Tok.getLoc(), "shift expected");
-        OffsetRegShifted = true;
-      }
-    }
-    else { // "[Rn," we have so far was not followed by "Rm"
-      // Look for #offset following the "[Rn,"
-      const AsmToken &HashTok = getLexer().getTok();
-      if (HashTok.isNot(AsmToken::Hash))
-        return Error(HashTok.getLoc(), "'#' expected");
-      getLexer().Lex(); // Eat hash token.
-
-      if (getParser().ParseExpression(Offset))
-       return true;
-    }
+    if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+                            Offset, OffsetIsReg, OffsetRegNum))
+      return true;
     const AsmToken &RBracTok = getLexer().getTok();
     if (RBracTok.isNot(AsmToken::RBrac))
       return Error(RBracTok.getLoc(), "']' expected");
@@ -374,11 +349,8 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
   }
   // The "[Rn" we have so far was not followed by a comma.
   else if (Tok.is(AsmToken::RBrac)) {
-    // This is a post indexing addressing forms:
-    //  [Rn], #offset
-    //  [Rn], +/-Rm
-    //  [Rn], +/-Rm, shift
-    // that is a ']' follows after the "[Rn".
+    // This is a post indexing addressing forms, that is a ']' follows after
+    // the "[Rn".
     Postindexed = true;
     Writeback = true;
     getLexer().Lex(); // Eat right bracket token.
@@ -394,42 +366,9 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
       if (NextTok.isNot(AsmToken::Comma))
 	return Error(NextTok.getLoc(), "',' expected");
       getLexer().Lex(); // Eat comma token.
-
-      const AsmToken &PlusMinusTok = getLexer().getTok();
-      if (PlusMinusTok.is(AsmToken::Plus))
-	getLexer().Lex(); // Eat plus token.
-      else if (PlusMinusTok.is(AsmToken::Minus)) {
-	Negative = true;
-	getLexer().Lex(); // Eat minus token
-      }
-
-      // See if there is a register following the "[Rn]," we have so far.
-      const AsmToken &OffsetRegTok = getLexer().getTok();
-      OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
-      if (OffsetRegNum != -1) {
-	OffsetIsReg = true;
-	getLexer().Lex(); // Eat identifier token for the offset register.
-	// Look for a comma then a shift
-	const AsmToken &Tok = getLexer().getTok();
-	if (Tok.is(AsmToken::Comma)) {
-	  getLexer().Lex(); // Eat comma token.
-
-	  const AsmToken &Tok = getLexer().getTok();
-	  if (ParseShift(&ShiftType, ShiftAmount))
-	    return Error(Tok.getLoc(), "shift expected");
-	  OffsetRegShifted = true;
-	}
-      }
-      else { // "[Rn]," we have so far was not followed by "Rm"
-	// Look for #offset following the "[Rn],"
-	const AsmToken &HashTok = getLexer().getTok();
-	if (HashTok.isNot(AsmToken::Hash))
-	  return Error(HashTok.getLoc(), "'#' expected");
-	getLexer().Lex(); // Eat hash token.
-
-	if (getParser().ParseExpression(Offset))
-	 return true;
-      }
+      if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+                              ShiftAmount, Offset, OffsetIsReg, OffsetRegNum))
+        return true;
     }
 
     Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
@@ -441,45 +380,105 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
   return true;
 }
 
+/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
+/// we will parse the following (were +/- means that a plus or minus is
+/// optional):
+///   +/-Rm
+///   +/-Rm, shift
+///   #offset
+/// we return false on success or an error otherwise.
+bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
+					bool &OffsetRegShifted,
+                                        enum ShiftType &ShiftType,
+                                        const MCExpr *&ShiftAmount,
+                                        const MCExpr *&Offset,
+                                        bool &OffsetIsReg,
+                                        int &OffsetRegNum) {
+  ARMOperand Op;
+  Negative = false;
+  OffsetRegShifted = false;
+  OffsetIsReg = false;
+  OffsetRegNum = -1;
+  const AsmToken &NextTok = getLexer().getTok();
+  if (NextTok.is(AsmToken::Plus))
+    getLexer().Lex(); // Eat plus token.
+  else if (NextTok.is(AsmToken::Minus)) {
+    Negative = true;
+    getLexer().Lex(); // Eat minus token
+  }
+  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
+  const AsmToken &OffsetRegTok = getLexer().getTok();
+  if (OffsetRegTok.is(AsmToken::Identifier)) {
+    OffsetIsReg = !MaybeParseRegister(Op, false);
+    if (OffsetIsReg)
+      OffsetRegNum = Op.getReg();
+  }
+  // If we parsed a register as the offset then their can be a shift after that
+  if (OffsetRegNum != -1) {
+    // Look for a comma then a shift
+    const AsmToken &Tok = getLexer().getTok();
+    if (Tok.is(AsmToken::Comma)) {
+      getLexer().Lex(); // Eat comma token.
+
+      const AsmToken &Tok = getLexer().getTok();
+      if (ParseShift(ShiftType, ShiftAmount))
+	return Error(Tok.getLoc(), "shift expected");
+      OffsetRegShifted = true;
+    }
+  }
+  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
+    // Look for #offset following the "[Rn," or "[Rn],"
+    const AsmToken &HashTok = getLexer().getTok();
+    if (HashTok.isNot(AsmToken::Hash))
+      return Error(HashTok.getLoc(), "'#' expected");
+    getLexer().Lex(); // Eat hash token.
+
+    if (getParser().ParseExpression(Offset))
+     return true;
+  }
+  return false;
+}
+
 /// ParseShift as one of these two:
 ///   ( lsl | lsr | asr | ror ) , # shift_amount
 ///   rrx
 /// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) {
+bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount) {
   const AsmToken &Tok = getLexer().getTok();
   if (Tok.isNot(AsmToken::Identifier))
     return true;
   const StringRef &ShiftName = Tok.getString();
   if (ShiftName == "lsl" || ShiftName == "LSL")
-    *St = Lsl;
+    St = Lsl;
   else if (ShiftName == "lsr" || ShiftName == "LSR")
-    *St = Lsr;
+    St = Lsr;
   else if (ShiftName == "asr" || ShiftName == "ASR")
-    *St = Asr;
+    St = Asr;
   else if (ShiftName == "ror" || ShiftName == "ROR")
-    *St = Ror;
+    St = Ror;
   else if (ShiftName == "rrx" || ShiftName == "RRX")
-    *St = Rrx;
+    St = Rrx;
   else
     return true;
   getLexer().Lex(); // Eat shift type token.
 
-  // For all but a Rotate right there must be a '#' and a shift amount
-  if (*St != Rrx) {
-    // Look for # following the shift type
-    const AsmToken &HashTok = getLexer().getTok();
-    if (HashTok.isNot(AsmToken::Hash))
-      return Error(HashTok.getLoc(), "'#' expected");
-    getLexer().Lex(); // Eat hash token.
+  // Rrx stands alone.
+  if (St == Rrx)
+    return false;
 
-    if (getParser().ParseExpression(ShiftAmount))
-      return true;
-  }
+  // Otherwise, there must be a '#' and a shift amount.
+  const AsmToken &HashTok = getLexer().getTok();
+  if (HashTok.isNot(AsmToken::Hash))
+    return Error(HashTok.getLoc(), "'#' expected");
+  getLexer().Lex(); // Eat hash token.
+
+  if (getParser().ParseExpression(ShiftAmount))
+    return true;
 
   return false;
 }
 
-// A hack to allow some testing, to be replaced by a real table gen version.
+/// A hack to allow some testing, to be replaced by a real table gen version.
 int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
   if (Name == "r0" || Name == "R0")
     return 0;
@@ -518,7 +517,7 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
   return -1;
 }
 
-// A hack to allow some testing, to be replaced by a real table gen version.
+/// A hack to allow some testing, to be replaced by a real table gen version.
 bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
                                     MCInst &Inst) {
   struct ARMOperand Op0 = Operands[0];
@@ -549,12 +548,12 @@ bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
   return true;
 }
 
-// Parse a arm instruction operand.  For now this parses the operand regardless
-// of the mnemonic.
+/// Parse a arm instruction operand.  For now this parses the operand regardless
+/// of the mnemonic.
 bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
   switch (getLexer().getKind()) {
   case AsmToken::Identifier:
-    if (!ParseRegister(Op))
+    if (!MaybeParseRegister(Op, true))
       return false;
     // This was not a register so parse other operands that start with an
     // identifier (like labels) as expressions and create them as immediates.
@@ -581,7 +580,7 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
   }
 }
 
-// Parse an arm instruction mnemonic followed by its operands.
+/// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
   SmallVector<ARMOperand, 7> Operands;
 
@@ -739,7 +738,7 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
   return false;
 }
 
-// Force static initialization.
+/// Force static initialization.
 extern "C" void LLVMInitializeARMAsmParser() {
   RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
   RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 8719e4c..19db411 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -135,6 +135,8 @@ namespace {
     void printJT2BlockOperand(const MachineInstr *MI, int OpNum);
     void printTBAddrMode(const MachineInstr *MI, int OpNum);
     void printNoHashImmediate(const MachineInstr *MI, int OpNum);
+    void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum);
+    void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum);
 
     virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                  unsigned AsmVariant, const char *ExtraCode);
@@ -157,7 +159,6 @@ namespace {
       printDataDirective(MCPV->getType());
 
       ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
-      GlobalValue *GV = ACPV->getGV();
       std::string Name;
 
       if (ACPV->isLSDA()) {
@@ -165,7 +166,10 @@ namespace {
         raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
           "_LSDA_" << getFunctionNumber();
         Name = LSDAName.str();
-      } else if (GV) {
+      } else if (ACPV->isBlockAddress()) {
+        Name = GetBlockAddressSymbol(ACPV->getBlockAddress())->getName();
+      } else if (ACPV->isGlobalValue()) {
+        GlobalValue *GV = ACPV->getGV();
         bool isIndirect = Subtarget->isTargetDarwin() &&
           Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
         if (!isIndirect)
@@ -186,8 +190,10 @@ namespace {
             StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
           }
         }
-      } else
+      } else {
+        assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
         Name = Mang->makeNameProper(ACPV->getSymbol());
+      }
       O << Name;
 
       if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
@@ -393,9 +399,11 @@ static void printSOImm(formatted_raw_ostream &O, int64_t V, bool VerboseAsm,
   if (Rot) {
     O << "#" << Imm << ", " << Rot;
     // Pretty printed version.
-    if (VerboseAsm)
-      O << ' ' << MAI->getCommentString()
-        << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString() << ' ';
+      O << (int)ARM_AM::rotr32(Imm, Rot);
+    }
   } else {
     O << "#" << Imm;
   }
@@ -419,7 +427,7 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   printSOImm(O, V1, VerboseAsm, MAI);
   O << "\n\torr";
   printPredicateOperand(MI, 2);
-  O << " ";
+  O << "\t";
   printOperand(MI, 0);
   O << ", ";
   printOperand(MI, 0);
@@ -970,6 +978,26 @@ void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) {
   O << MI->getOperand(OpNum).getImm();
 }
 
+void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) {
+  const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
+  O << '#' << ARM::getVFPf32Imm(FP->getValueAPF());
+  if (VerboseAsm) {
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << ' ';
+    WriteAsOperand(O, FP, /*PrintType=*/false);
+  }
+}
+
+void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum) {
+  const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
+  O << '#' << ARM::getVFPf64Imm(FP->getValueAPF());
+  if (VerboseAsm) {
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << ' ';
+    WriteAsOperand(O, FP, /*PrintType=*/false);
+  }
+}
+
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                     unsigned AsmVariant, const char *ExtraCode){
   // Does this asm operand have a single letter operand modifier?
@@ -1182,7 +1210,8 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
           EmitAlignment(Align, GVar);
           O << name << ":";
           if (VerboseAsm) {
-            O << "\t\t\t\t" << MAI->getCommentString() << ' ';
+            O.PadToColumn(MAI->getCommentColumn());
+            O << MAI->getCommentString() << ' ';
             WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
           }
           O << '\n';
@@ -1205,7 +1234,8 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
           O << "," << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
       }
       if (VerboseAsm) {
-        O << "\t\t" << MAI->getCommentString() << " ";
+        O.PadToColumn(MAI->getCommentColumn());
+        O << MAI->getCommentString() << ' ';
         WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
       }
       O << "\n";
@@ -1243,7 +1273,8 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
   EmitAlignment(Align, GVar);
   O << name << ":";
   if (VerboseAsm) {
-    O << "\t\t\t\t" << MAI->getCommentString() << " ";
+    O.PadToColumn(MAI->getCommentColumn());
+    O << MAI->getCommentString() << ' ';
     WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
   }
   O << "\n";
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 4925137..5bf966b 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -78,6 +78,8 @@ public:
   void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {}
   void printTBAddrMode(const MCInst *MI, unsigned OpNum) {}
   void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
+  void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
+  void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
 
   void printPCLabel(const MCInst *MI, unsigned OpNum);  
   // FIXME: Implement.
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 757164e..8686961 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -158,6 +158,10 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
       break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, Printer.GetBlockAddressSymbol(
+                                              MO.getBlockAddress()));
+      break;
     }
     
     OutMI.addOperand(MCOp);
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 6e09eb2..e071b61 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,15 +17,16 @@ add_llvm_target(ARMCodeGen
   ARMCodeEmitter.cpp
   ARMConstantIslandPass.cpp
   ARMConstantPoolValue.cpp
-  ARMInstrInfo.cpp
   ARMISelDAGToDAG.cpp
   ARMISelLowering.cpp
+  ARMInstrInfo.cpp
   ARMJITInfo.cpp
   ARMLoadStoreOptimizer.cpp
   ARMMCAsmInfo.cpp
   ARMRegisterInfo.cpp
   ARMSubtarget.cpp
   ARMTargetMachine.cpp
+  NEONMoveFix.cpp
   NEONPreAllocPass.cpp
   Thumb1InstrInfo.cpp
   Thumb1RegisterInfo.cpp
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
new file mode 100644
index 0000000..f307e3b
--- /dev/null
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -0,0 +1,141 @@
+//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "neon-mov-fix"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumVMovs, "Number of reg-reg moves converted");
+
+namespace {
+  struct NEONMoveFixPass : public MachineFunctionPass {
+    static char ID;
+    NEONMoveFixPass() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NEON reg-reg move conversion";
+    }
+
+  private:
+    const TargetRegisterInfo *TRI;
+    const ARMBaseInstrInfo *TII;
+
+    typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+    bool InsertMoves(MachineBasicBlock &MBB);
+  };
+  char NEONMoveFixPass::ID = 0;
+}
+
+bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
+  RegMap Defs;
+  bool Modified = false;
+
+  // Walk over MBB tracking the def points of the registers.
+  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMII;
+  for (; MII != E; MII = NextMII) {
+    NextMII = next(MII);
+    MachineInstr *MI = &*MII;
+
+    if (MI->getOpcode() == ARM::FCPYD &&
+        !TII->isPredicated(MI)) {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      // If we do not found an instruction defining the reg, this means the
+      // register should be live-in for this BB. It's always to better to use
+      // NEON reg-reg moves.
+      unsigned Domain = ARMII::DomainNEON;
+      RegMap::iterator DefMI = Defs.find(SrcReg);
+      if (DefMI != Defs.end()) {
+        Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
+        // Instructions in general domain are subreg accesses.
+        // Map them to NEON reg-reg moves.
+        if (Domain == ARMII::DomainGeneral)
+          Domain = ARMII::DomainNEON;
+      }
+
+      if (Domain & ARMII::DomainNEON) {
+        // Convert FCPYD to VMOVD.
+        unsigned DestReg = MI->getOperand(0).getReg();
+
+        DEBUG({errs() << "vmov convert: "; MI->dump();});
+
+        // It's safe to ignore imp-defs / imp-uses here, since:
+        //  - We're running late, no intelligent condegen passes should be run
+        //    afterwards
+        //  - The imp-defs / imp-uses are superregs only, we don't care about
+        //    them.
+        BuildMI(MBB, *MI, MI->getDebugLoc(),
+                TII->get(ARM::VMOVD), DestReg).addReg(SrcReg);
+        MBB.erase(MI);
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+
+        DEBUG({errs() << "        into: "; MI->dump();});
+
+        Modified = true;
+        ++NumVMovs;
+      } else {
+        assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
+        // Do nothing.
+      }
+    }
+
+    // Update def information.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand& MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+
+      Defs[MOReg] = MI;
+      // Catch subregs as well.
+      for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R)
+        Defs[*R] = MI;
+    }
+  }
+
+  return Modified;
+}
+
+bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
+  ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
+  const TargetMachine &TM = Fn.getTarget();
+
+  if (AFI->isThumbFunction())
+    return false;
+
+  TRI = TM.getRegisterInfo();
+  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= InsertMoves(MBB);
+  }
+
+  return Modified;
+}
+
+/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
+/// pass.
+FunctionPass *llvm::createNEONMoveFixPass() {
+  return new NEONMoveFixPass();
+}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 821b872..8b2bcd0 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -16,7 +16,7 @@
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN NEONPreAllocPass : public MachineFunctionPass {
+  class NEONPreAllocPass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
 
   public:
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 8fb1da3..fb64d9f 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -8,12 +8,8 @@ Reimplement 'select' in terms of 'SEL'.
   add doesn't need to overflow between the two 16-bit chunks.
 
 * Implement pre/post increment support.  (e.g. PR935)
-* Coalesce stack slots!
 * Implement smarter constant generation for binops with large immediates.
 
-* Consider materializing FP constants like 0.0f and 1.0f using integer 
-  immediate instructions then copy to FPU.  Slower than load into FPU?
-
 //===---------------------------------------------------------------------===//
 
 Crazy idea:  Consider code that uses lots of 8-bit or 16-bit values.  By the
@@ -422,14 +418,6 @@ are not remembered when the same two values are compared twice.
 
 //===---------------------------------------------------------------------===//
 
-More register scavenging work:
-
-1. Use the register scavenger to track frame index materialized into registers
-   (those that do not fit in addressing modes) to allow reuse in the same BB.
-2. Finish scavenging for Thumb.
-
-//===---------------------------------------------------------------------===//
-
 More LSR enhancements possible:
 
 1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
@@ -540,10 +528,6 @@ while ARMConstantIslandPass only need to worry about LDR (literal).
 
 //===---------------------------------------------------------------------===//
 
-We need to fix constant isel for ARMv6t2 to use MOVT.
-
-//===---------------------------------------------------------------------===//
-
 Constant island pass should make use of full range SoImm values for LEApcrel.
 Be careful though as the last attempt caused infinite looping on lencod.
 
@@ -593,10 +577,17 @@ it saves an instruction and a register.
 
 //===---------------------------------------------------------------------===//
 
-add/sub/and/or + i32 imm can be simplified by folding part of the immediate
-into the operation.
+It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
+with the same bottom half.
 
 //===---------------------------------------------------------------------===//
 
-It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
-with the same bottom half.
+Robert Muth started working on an alternate jump table implementation that
+does not put the tables in-line in the text.  This is more like the llvm
+default jump table implementation.  This might be useful sometime.  Several
+revisions of patches are on the mailing list, beginning at:
+http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
+
+//===---------------------------------------------------------------------===//
+
+Make use of the "rbit" instruction.
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 7eed30e..b6dd56c 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -17,12 +17,15 @@
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/ADT/SmallVector.h"
 #include "Thumb1InstrInfo.h"
 
 using namespace llvm;
 
-Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) {
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
 unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
@@ -38,6 +41,7 @@ Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   case ARM::tBX_RET_vararg:
   case ARM::tPOP_RET:
   case ARM::tB:
+  case ARM::tBRIND:
   case ARM::tBR_JTr:
     return true;
   default:
@@ -121,9 +125,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
            isARMLowRegister(SrcReg))) && "Unknown regclass!");
 
   if (RC == ARM::tGPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                              MachineMemOperand::MOStore, 0,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
                    .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addImm(0));
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
 }
 
@@ -139,8 +150,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
            isARMLowRegister(DestReg))) && "Unknown regclass!");
 
   if (RC == ARM::tGPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                              MachineMemOperand::MOLoad, 0,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
-                   .addFrameIndex(FI).addImm(0));
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
 }
 
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6207177..5aaaf9c 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -76,18 +76,6 @@ Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const {
   return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
 }
 
-bool
-Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  return true;
-}
-
-bool
-Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
-  const {
-  return true;
-}
-
-
 bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
   const MachineFrameInfo *FFI = MF.getFrameInfo();
   unsigned CFSize = FFI->getMaxCallFrameSize();
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 570a5bc..241f1cc 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -40,9 +40,6 @@ public:
   const TargetRegisterClass *
     getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const;
 
-  bool requiresRegisterScavenging(const MachineFunction &MF) const;
-  bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
-
   bool hasReservedCallFrame(MachineFunction &MF) const;
 
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 427c0bb..462844b 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -14,14 +14,13 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumITs,     "Number of IT blocks inserted");
 
 namespace {
-  struct VISIBILITY_HIDDEN Thumb2ITBlockPass : public MachineFunctionPass {
+  struct Thumb2ITBlockPass : public MachineFunctionPass {
     static char ID;
     Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 264601b..21fff51 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -18,12 +18,15 @@
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/ADT/SmallVector.h"
 #include "Thumb2InstrInfo.h"
 
 using namespace llvm;
 
-Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) {
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
 unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
@@ -46,6 +49,7 @@ Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   case ARM::tBX_RET_vararg:
   case ARM::tPOP_RET:
   case ARM::tB:
+  case ARM::tBRIND:
     return true;
   default:
     break;
@@ -89,9 +93,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                              MachineMemOperand::MOStore, 0,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
                    .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addImm(0));
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     return;
   }
 
@@ -106,8 +117,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+                              MachineMemOperand::MOLoad, 0,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
-                   .addFrameIndex(FI).addImm(0));
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     return;
   }
 
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index f217e0e..f24d3e2 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------*- C++ -*-===//
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -59,8 +60,3 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
     .addReg(DestReg, getDefRegState(true), SubIdx)
     .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0);
 }
-
-bool Thumb2RegisterInfo::
-requiresRegisterScavenging(const MachineFunction &MF) const {
-  return true;
-}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index a63c60b..a295630 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -35,8 +35,6 @@ public:
                          unsigned DestReg, unsigned SubIdx, int Val,
                          ARMCC::CondCodes Pred = ARMCC::AL,
                          unsigned PredReg = 0) const;
-
-  bool requiresRegisterScavenging(const MachineFunction &MF) const;
 };
 }
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index b8879d2..9ce30aa 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -17,7 +17,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
@@ -126,7 +125,7 @@ namespace {
     { ARM::t2STM,   ARM::tSTM,    ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
   };
 
-  class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
+  class Thumb2SizeReduce : public MachineFunctionPass {
   public:
     static char ID;
     Thumb2SizeReduce();
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
index 719ffae..001656e 100644
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -15,13 +15,12 @@
 #include "Alpha.h"
 #include "AlphaInstrInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/MC/MCAsmInfo.h"
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN AlphaBSel : public MachineFunctionPass {
+  struct AlphaBSel : public MachineFunctionPass {
     static char ID;
     AlphaBSel() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index 8023add..b090f0d 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -50,8 +49,7 @@ namespace {
   };
 
   template <class CodeEmitter>
-  class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
-      public AlphaCodeEmitter
+  class Emitter : public MachineFunctionPass, public AlphaCodeEmitter
   {
     const AlphaInstrInfo  *II;
     TargetMachine         &TM;
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index b3f865c..cb03a6f 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -157,11 +157,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
 
   setStackPointerRegisterToSaveRestore(Alpha::R30);
 
-  addLegalFPImmediate(APFloat(+0.0)); //F31
-  addLegalFPImmediate(APFloat(+0.0f)); //F31
-  addLegalFPImmediate(APFloat(-0.0)); //-F31
-  addLegalFPImmediate(APFloat(-0.0f)); //-F31
-
   setJumpBufSize(272);
   setJumpBufAlignment(16);
 
@@ -919,3 +914,13 @@ AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Alpha target isn't yet aware of offsets.
   return false;
 }
+
+bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  // +0.0   F31
+  // +0.0f  F31
+  // -0.0  -F31
+  // -0.0f -F31
+  return Imm.isZero() || Imm.isNegZero();
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index b580c9d..b204faf 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -102,6 +102,11 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
   private:
     // Helpers for custom lowering.
     void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 3b98206..81e1fb7 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -164,7 +164,7 @@ def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64
          "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
 
 
-let usesCustomDAGSchedInserter = 1 in {   // Expanded by the scheduler.
+let usesCustomInserter = 1 in {   // Expanded after instruction selection.
 def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
       [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
 def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index d8e8b79..209a5bf 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
@@ -37,7 +36,7 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  struct VISIBILITY_HIDDEN AlphaAsmPrinter : public AsmPrinter {
+  struct AlphaAsmPrinter : public AsmPrinter {
     /// Unique incrementer for label values for referencing Global values.
     ///
 
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
index 91fd5dd..1900d00 100644
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  class VISIBILITY_HIDDEN BlackfinAsmPrinter : public AsmPrinter {
+  class BlackfinAsmPrinter : public AsmPrinter {
   public:
     BlackfinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
                        const MCAsmInfo *MAI, bool V)
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 934b188..c952af1 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -465,16 +465,16 @@ def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
 }
 
 def : Pat<(truncstorei16 D:$val, PI:$ptr),
-          (STORE16pi (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$val, D),
+          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
                                      bfin_subreg_lo16), PI:$ptr)>;
 
 def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
-          (STORE16pi (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$val, D),
+          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
                                      bfin_subreg_hi16), PI:$ptr)>;
 
 def : Pat<(truncstorei8 D16L:$val, P:$ptr),
           (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                                  (COPY_TO_REGCLASS D16L:$val, D16L),
+                                  (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
                                   bfin_subreg_lo16),
                    P:$ptr)>;
 
@@ -525,7 +525,7 @@ def : Pat<(and D:$src, 0xffff),
 
 def : Pat<(i32 (anyext D16L:$src)),
           (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                         (COPY_TO_REGCLASS D16L:$src, D16L),
+                         (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
                          bfin_subreg_lo16)>;
 
 // TODO Dreg = Dreg_byte (X/Z)
@@ -870,4 +870,4 @@ def : Pat<(i16 (anyext JustCC:$cc)),
           (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
 
 def : Pat<(i16 (trunc D:$src)),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$src, D), bfin_subreg_lo16)>;
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index cbf769b..9e4fe27 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -282,6 +282,7 @@ namespace {
     void visitReturnInst(ReturnInst &I);
     void visitBranchInst(BranchInst &I);
     void visitSwitchInst(SwitchInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
     void visitInvokeInst(InvokeInst &I) {
       llvm_unreachable("Lowerinvoke pass didn't work!");
     }
@@ -303,7 +304,6 @@ namespace {
     bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
 
     void visitAllocaInst(AllocaInst &I);
-    void visitFreeInst  (FreeInst   &I);
     void visitLoadInst  (LoadInst   &I);
     void visitStoreInst (StoreInst  &I);
     void visitGetElementPtrInst(GetElementPtrInst &I);
@@ -1627,7 +1627,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
   }
   
   // Should this be a signed comparison?  If so, convert to signed.
-  bool castIsSigned = Cmp.isSignedPredicate();
+  bool castIsSigned = Cmp.isSigned();
 
   // If the operand was a pointer, convert to a large integer type.
   const Type* OpTy = Operand->getType();
@@ -2579,6 +2579,12 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
   Out << "  }\n";
 }
 
+void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
+  Out << "  goto *(void*)(";
+  writeOperand(IBI.getOperand(0));
+  Out << ");\n";
+}
+
 void CWriter::visitUnreachableInst(UnreachableInst &I) {
   Out << "  /*UNREACHABLE*/;\n";
 }
@@ -3417,10 +3423,6 @@ void CWriter::visitAllocaInst(AllocaInst &I) {
   Out << ')';
 }
 
-void CWriter::visitFreeInst(FreeInst &I) {
-  llvm_unreachable("lowerallocations pass didn't work!");
-}
-
 void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
                                  gep_type_iterator E, bool Static) {
   
@@ -3685,7 +3687,6 @@ bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
   if (FileType != TargetMachine::AssemblyFile) return true;
 
   PM.add(createGCLoweringPass());
-  PM.add(createLowerAllocationsPass());
   PM.add(createLowerInvokePass());
   PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
   PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 0f8d539..007fe8f 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -36,7 +36,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
@@ -50,7 +49,7 @@ namespace {
 
   const std::string bss_section(".bss");
 
-  class VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter {
+  class SPUAsmPrinter : public AsmPrinter {
     std::set<std::string> FnStubs, GVStubs;
   public:
     explicit SPUAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
@@ -285,7 +284,7 @@ namespace {
   };
 
   /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
-  class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
+  class LinuxAsmPrinter : public SPUAsmPrinter {
   public:
     explicit LinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
                              const MCAsmInfo *T, bool V)
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 02713b5..c960974 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_TARGET_IBMCELLSPU_H
 #define LLVM_TARGET_IBMCELLSPU_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 45c2a7b..4bae6c7 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1118,13 +1118,13 @@ namespace {
       break;
     }
     case Instruction::Switch: {
-      const SwitchInst* sw = cast<SwitchInst>(I);
+      const SwitchInst *SI = cast<SwitchInst>(I);
       Out << "SwitchInst* " << iName << " = SwitchInst::Create("
           << opNames[0] << ", "
           << opNames[1] << ", "
-          << sw->getNumCases() << ", " << bbname << ");";
+          << SI->getNumCases() << ", " << bbname << ");";
       nl(Out);
-      for (unsigned i = 2; i < sw->getNumOperands(); i += 2 ) {
+      for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
         Out << iName << "->addCase("
             << opNames[i] << ", "
             << opNames[i+1] << ");";
@@ -1132,6 +1132,17 @@ namespace {
       }
       break;
     }
+    case Instruction::IndirectBr: {
+      const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+      Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+          << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+      nl(Out);
+      for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+        Out << iName << "->addDestination(" << opNames[i] << ");";
+        nl(Out);
+      }
+      break;
+    }
     case Instruction::Invoke: {
       const InvokeInst* inv = cast<InvokeInst>(I);
       Out << "std::vector<Value*> " << iName << "_params;";
@@ -1258,11 +1269,6 @@ namespace {
       Out << "\");";
       break;
     }
-    case Instruction::Free: {
-      Out << "FreeInst* " << iName << " = new FreeInst("
-          << getCppName(I->getOperand(0)) << ", " << bbname << ");";
-      break;
-    }
     case Instruction::Alloca: {
       const AllocaInst* allocaI = cast<AllocaInst>(I);
       Out << "AllocaInst* " << iName << " = new AllocaInst("
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index cf08a97..949b910 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
 
 namespace llvm {
   // TargetMachine for the MSIL 
-  struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine {
+  struct MSILTarget : public TargetMachine {
     MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
       : TargetMachine(T) {}
 
@@ -1191,9 +1191,6 @@ void MSILWriter::printInstruction(const Instruction* Inst) {
   case Instruction::Alloca:
     printAllocaInstruction(cast<AllocaInst>(Inst));
     break;
-  case Instruction::Free:
-    llvm_unreachable("LowerAllocationsPass used");
-    break;
   case Instruction::Unreachable:
     printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
     printSimpleInstruction("newobj",
@@ -1532,7 +1529,7 @@ void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
   case Type::StructTyID:
     for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
       if (I!=0) Out << ",\n";
-      printStaticConstant(C->getOperand(I),Offset);
+      printStaticConstant(cast<Constant>(C->getOperand(I)), Offset);
     }
     break;
   case Type::PointerTyID:
@@ -1699,7 +1696,6 @@ bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
   if (FileType != TargetMachine::AssemblyFile) return true;
   MSILWriter* Writer = new MSILWriter(o);
   PM.add(createGCLoweringPass());
-  PM.add(createLowerAllocationsPass());
   // FIXME: Handle switch trougth native IL instruction "switch"
   PM.add(createLowerSwitchPass());
   PM.add(createCFGSimplificationPass());
diff --git a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
index f1eb885..4b1f4e6 100644
--- a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
@@ -1,8 +1,8 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMMSP430AsmPrinter
-  MSP430InstPrinter.cpp
   MSP430AsmPrinter.cpp
+  MSP430InstPrinter.cpp
   MSP430MCInstLower.cpp
   )
 add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index ace358e..237c313 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -36,7 +36,6 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -50,7 +49,7 @@ EnableMCInst("enable-msp430-mcinst-printer", cl::Hidden,
              cl::desc("enable experimental mcinst gunk in the msp430 backend"));
 
 namespace {
-  class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter {
+  class MSP430AsmPrinter : public AsmPrinter {
   public:
     MSP430AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
                      const MCAsmInfo *MAI, bool V)
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index e202175..2b50669 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -108,7 +108,7 @@ def ADJCALLSTACKUP   : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
                               [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-let usesCustomDAGSchedInserter = 1 in {
+let usesCustomInserter = 1 in {
   def Select8  : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc),
                         "# Select8 PSEUDO",
                         [(set GR8:$dst,
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index ccf9ee5..66ade89 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -51,7 +51,7 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  class VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter {
+  class MipsAsmPrinter : public AsmPrinter {
     const MipsSubtarget *Subtarget;
   public:
     explicit MipsAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index cc20dd7..810dce1 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -30,7 +30,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,7 +45,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 namespace {
 
-class VISIBILITY_HIDDEN MipsDAGToDAGISel : public SelectionDAGISel {
+class MipsDAGToDAGISel : public SelectionDAGISel {
 
   /// TM - Keep a reference to MipsTargetMachine.
   MipsTargetMachine &TM;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index ab8790a..61da8f8 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -72,9 +72,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
     if (!Subtarget->isFP64bit())
       addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
 
-  // Legal fp constants
-  addLegalFPImmediate(APFloat(+0.0f));
-
   // Load extented operations for i1 types must be promoted 
   setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
@@ -1224,3 +1221,9 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Mips target isn't yet aware of offsets.
   return false;
 }
+
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  return Imm.isZero();
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index dddba42..cacf4b5 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -146,6 +146,11 @@ namespace llvm {
               EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
   };
 }
 
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index b6a6d2f..bd61738 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -253,7 +253,7 @@ let hasDelaySlot = 1, Defs=[FCR31] in {
 
 // For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a 
 // condiciton code to enable easy handling by the Custom Inserter.
-let usesCustomDAGSchedInserter = 1, Uses=[FCR31] in {
+let usesCustomInserter = 1, Uses=[FCR31] in {
   class PseudoFPSelCC<RegisterClass RC, string asmstr> : 
     MipsPseudo<(outs RC:$dst), 
                (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr, 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index b9276fe..46cf43e 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -417,7 +417,7 @@ def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
 // operation. The solution is to create a Mips pseudo SELECT_CC instruction
 // (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally 
 // replace it for real supported nodes into EmitInstrWithCustomInserter
-let usesCustomDAGSchedInserter = 1 in {
+let usesCustomInserter = 1 in {
   class PseudoSelCC<RegisterClass RC, string asmstr>: 
     MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, 
     [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index ea0f494..b2a4c11 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -298,6 +298,7 @@ bool PIC16AsmPrinter::doInitialization(Module &M) {
   EmitIData(M);
   EmitUData(M);
   EmitRomData(M);
+  EmitSharedUdata(M);
   EmitUserSections(M);
   return Result;
 }
@@ -370,6 +371,11 @@ void PIC16AsmPrinter::EmitRomData(Module &M) {
   EmitSingleSection(PTOF->ROMDATASection());
 }
 
+// Emit Shared section udata.
+void PIC16AsmPrinter::EmitSharedUdata(Module &M) {
+  EmitSingleSection(PTOF->SHAREDUDATASection());
+}
+
 bool PIC16AsmPrinter::doFinalization(Module &M) {
   EmitAllAutos(M);
   printLibcallDecls();
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index b13d9ce..838c970 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -55,6 +55,7 @@ namespace llvm {
     void EmitUData (Module &M);
     void EmitAllAutos (Module &M);
     void EmitRomData (Module &M);
+    void EmitSharedUdata(Module &M);
     void EmitUserSections (Module &M);
     void EmitFunctionFrame(MachineFunction &MF);
     void printLibcallDecls();
diff --git a/lib/Target/PIC16/MCSectionPIC16.h b/lib/Target/PIC16/MCSectionPIC16.h
deleted file mode 100644
index 352be99..0000000
--- a/lib/Target/PIC16/MCSectionPIC16.h
+++ /dev/null
@@ -1,88 +0,0 @@
-//===- MCSectionPIC16.h - PIC16-specific section representation -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MCSectionPIC16 class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_PIC16SECTION_H
-#define LLVM_PIC16SECTION_H
-
-#include "llvm/MC/MCSection.h"
-
-namespace llvm {
-
-  /// MCSectionPIC16 - Represents a physical section in PIC16 COFF.
-  /// Contains data objects.
-  ///
-  class MCSectionPIC16 : public MCSection {
-    /// Name of the section to uniquely identify it.
-    std::string Name;
-
-    /// User can specify an address at which a section should be placed. 
-    /// Negative value here means user hasn't specified any. 
-    int Address; 
-
-    /// Overlay information - Sections with same color can be overlaid on
-    /// one another.
-    int Color; 
-
-    /// Conatined data objects.
-    std::vector<const GlobalVariable *>Items;
-
-    /// Total size of all data objects contained here.
-    unsigned Size;
-    
-    MCSectionPIC16(const StringRef &name, SectionKind K, int addr, int color)
-      : MCSection(K), Name(name), Address(addr), Color(color) {
-    }
-    
-  public:
-    /// Return the name of the section.
-    const std::string &getName() const { return Name; }
-
-    /// Return the Address of the section.
-    int getAddress() const { return Address; }
-
-    /// Return the Color of the section.
-    int getColor() const { return Color; }
-
-    /// PIC16 Terminology for section kinds is as below.
-    /// UDATA - BSS
-    /// IDATA - initialized data (equiv to Metadata) 
-    /// ROMDATA - ReadOnly.
-    /// UDATA_OVR - Sections that can be overlaid. Section of such type is
-    ///             used to contain function autos an frame. We can think of
-    ///             it as equiv to llvm ThreadBSS)
-    /// So, let's have some convenience functions to Map PIC16 Section types 
-    /// to SectionKind just for the sake of better readability.
-    static SectionKind UDATA_Kind() { return SectionKind::getBSS(); } 
-    static SectionKind IDATA_Kind() { return SectionKind::getMetadata(); }
-    static SectionKind ROMDATA_Kind() { return SectionKind::getReadOnly(); }
-    static SectionKind UDATA_OVR_Kind() { return SectionKind::getThreadBSS(); }
-
-    // If we could just do getKind() == UDATA_Kind() ?
-    bool isUDATA_Kind() { return getKind().isBSS(); }
-    bool isIDATA_Kind() { return getKind().isMetadata(); }
-    bool isROMDATA_Kind() { return getKind().isMetadata(); }
-    bool isUDATA_OVR_Kind() { return getKind().isThreadBSS(); }
-
-    /// This would be the only way to create a section. 
-    static MCSectionPIC16 *Create(const StringRef &Name, SectionKind K, 
-                                  int Address, int Color, MCContext &Ctx);
-    
-    /// Override this as PIC16 has its own way of printing switching
-    /// to a section.
-    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const;
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h
index 7f4c2f1..e18ddf1 100644
--- a/lib/Target/PIC16/PIC16ABINames.h
+++ b/lib/Target/PIC16/PIC16ABINames.h
@@ -234,6 +234,12 @@ namespace llvm {
       return "romdata.#";
     }
 
+    static std::string getSharedUDataSectionName() {
+       std::ostringstream o;
+       o << getTagName(PREFIX_SYMBOL)  << "udata_shr" << ".#";
+       return o.str();
+    }
+
     static std::string getRomdataSectionName(unsigned num,
                                              std::string prefix = "") {
        std::ostringstream o;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 87bd3d9..2fb405e 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -214,3 +214,25 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   // returning NULL.
   return 0;
 }
+
+bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return true;
+
+  // Get the terminator instruction.
+  --I;
+  // Handle unconditional branches. If the unconditional branch's target is
+  // successor basic block then remove the unconditional branch. 
+  if (I->getOpcode() == PIC16::br_uncond  && AllowModify) {
+    if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+      TBB = 0;
+      I->eraseFromParent();
+    }
+  }
+  return true;
+}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index 85c0984..56f51f0 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -68,7 +68,10 @@ public:
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB,
                         const SmallVectorImpl<MachineOperand> &Cond) const; 
-
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
   };
 } // namespace llvm
 
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
index 250ca0a..5eec6c4 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -467,9 +467,9 @@ def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst),
                           "goto $dst",
                           [(br bb:$dst)]>;
 
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1 in {   // Expanded by the scheduler.
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {   // Expanded after instruction selection.
   def SELECT_CC_Int_ICC
    : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index a97dc35..cc71b04 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -32,12 +32,11 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Support/Compiler.h"
 
 using namespace llvm;
 
 namespace {
-  struct VISIBILITY_HIDDEN MemSelOpt : public MachineFunctionPass {
+  struct MemSelOpt : public MachineFunctionPass {
     static char ID;
     MemSelOpt() : MachineFunctionPass(&ID) {}
 
@@ -144,7 +143,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   }
 
   // Get the section name(NewBank) for MemOp.
-  // This assumes that the section names for globals are laready set by
+  // This assumes that the section names for globals are already set by
   // AsmPrinter->doInitialization.
   std::string NewBank = CurBank;
   if (Op.getType() ==  MachineOperand::MO_GlobalAddress &&
@@ -156,7 +155,11 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
     std::string Sym = Op.getSymbolName();
     NewBank = PAN::getSectionNameForSym(Sym);
   }
- 
+
+  // If the section is shared section, do not emit banksel.
+  if (NewBank == PAN::getSharedUDataSectionName())
+    return Changed;
+
   // If the previous and new section names are same, we don't need to
   // emit banksel. 
   if (NewBank.compare(CurBank) != 0 ) {
diff --git a/lib/Target/PIC16/PIC16Passes/Makefile b/lib/Target/PIC16/PIC16Passes/Makefile
index cbb34b3..9684b8d 100644
--- a/lib/Target/PIC16/PIC16Passes/Makefile
+++ b/lib/Target/PIC16/PIC16Passes/Makefile
@@ -11,7 +11,5 @@ TARGET = PIC16
 LIBRARYNAME = LLVMpic16passes
 BUILD_ARCHIVE = 1
 
-
-
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
index 7eedf7f..d7cfe02 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -72,6 +72,7 @@ getPIC16DataSection(const std::string &Name, PIC16SectionType Ty,
   case UDATA: UDATASections_.push_back(Entry); break;
   case IDATA: IDATASections_.push_back(Entry); break;
   case ROMDATA: ROMDATASection_ = Entry; break;
+  case UDATA_SHR: SHAREDUDATASection_ = Entry; break;
   }
 
   return Entry;
@@ -125,6 +126,7 @@ void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){
   TM = &tm;
   
   ROMDATASection_ = NULL;
+  SHAREDUDATASection_ = NULL;
 }
 
 /// allocateUDATA - Allocate a un-initialized global to an existing or new UDATA
@@ -279,7 +281,10 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
     std::string AddrStr = "Address=";
     if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) {
       std::string SectAddr = SectName.substr(AddrStr.length());
-      return allocateAtGivenAddress(GVar, SectAddr);
+      if (SectAddr.compare("NEAR") == 0)
+        return allocateSHARED(GVar, Mang);
+      else
+        return allocateAtGivenAddress(GVar, SectAddr);
     }
      
     // Create the section specified with section attribute. 
@@ -289,6 +294,25 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
   return getPIC16DataSection(GV->getSection().c_str(), UDATA);
 }
 
+const MCSection *
+PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV,
+                                      Mangler *Mang) const {
+  // Make sure that this is an uninitialized global.
+  assert(GV->hasInitializer() && "This global doesn't need space");
+  if (!GV->getInitializer()->isNullValue()) {
+    // FIXME: Generate a warning in this case that near qualifier will be 
+    // ignored.
+    return SelectSectionForGlobal(GV, SectionKind::getDataRel(), Mang, *TM); 
+  } 
+  std::string Name = PAN::getSharedUDataSectionName(); 
+
+  PIC16Section *SharedUDataSect = getPIC16DataSection(Name.c_str(), UDATA_SHR); 
+  // Insert the GV into shared section.
+  SharedUDataSect->Items.push_back(GV);
+  return SharedUDataSect;
+}
+
+
 // Interface used by AsmPrinter to get a code section for a function.
 const PIC16Section *
 PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const {
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
index ca07bed..0b0ad43 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -56,6 +56,7 @@ namespace llvm {
     mutable std::vector<PIC16Section *> UDATASections_;
     mutable std::vector<PIC16Section *> IDATASections_;
     mutable PIC16Section * ROMDATASection_;
+    mutable PIC16Section * SHAREDUDATASection_;
 
     /// Standard Auto Sections.
     mutable std::vector<PIC16Section *> AUTOSections_;
@@ -110,6 +111,10 @@ namespace llvm {
     /// Allocate DATA at user specified address.
     const MCSection *allocateAtGivenAddress(const GlobalVariable *GV,
                                             const std::string &Addr) const;
+
+    /// Allocate a shared variable to SHARED section.
+    const MCSection *allocateSHARED(const GlobalVariable *GV,
+                                    Mangler *Mang) const;
    
     public:
     PIC16TargetObjectFile();
@@ -147,6 +152,9 @@ namespace llvm {
     const PIC16Section *ROMDATASection() const {
       return ROMDATASection_;
     }
+    const PIC16Section *SHAREDUDATASection() const {
+      return SHAREDUDATASection_;
+    }
     const std::vector<PIC16Section *> &AUTOSections() const {
       return AUTOSections_;
     }
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index dc6b852..4bc58d2 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -45,7 +45,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -55,7 +54,7 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  class VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter {
+  class PPCAsmPrinter : public AsmPrinter {
   protected:
     struct FnStubInfo {
       std::string Stub, LazyPtr, AnonSymbol;
@@ -344,7 +343,7 @@ namespace {
   };
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
-  class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
+  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
     explicit PPCLinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
                                 const MCAsmInfo *T, bool V)
@@ -369,7 +368,7 @@ namespace {
 
   /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
   /// OS X
-  class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter {
+  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
     formatted_raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index b95a502..a752421 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -23,14 +23,13 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
 
 namespace {
-  struct VISIBILITY_HIDDEN PPCBSel : public MachineFunctionPass {
+  struct PPCBSel : public MachineFunctionPass {
     static char ID;
     PPCBSel() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 0675293..da9ea36 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -25,7 +25,6 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
@@ -57,8 +56,7 @@ namespace {
   };
 
   template <class CodeEmitter>
-  class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
-      public PPCCodeEmitter {
+  class Emitter : public MachineFunctionPass, public PPCCodeEmitter {
     TargetMachine &TM;
     CodeEmitter &MCE;
 
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 8fa6a66..b866240 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -41,7 +40,7 @@ namespace {
   /// PPCDAGToDAGISel - PPC specific code to select PPC machine
   /// instructions for SelectionDAG operations.
   ///
-  class VISIBILITY_HIDDEN PPCDAGToDAGISel : public SelectionDAGISel {
+  class PPCDAGToDAGISel : public SelectionDAGISel {
     PPCTargetMachine &TM;
     PPCTargetLowering &PPCLowering;
     const PPCSubtarget &PPCSubTarget;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 0f68fb9..d1e1bd5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -127,7 +127,7 @@ def : Pat<(PPCnop),
           (NOP)>;
 
 // Atomic operations
-let usesCustomDAGSchedInserter = 1 in {
+let usesCustomInserter = 1 in {
   let Uses = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index dc5db6f..1c7c05e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -363,9 +363,9 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
                        [(set GPRC:$result,
                              (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
                          
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1,    // Expanded by the scheduler.
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1,    // Expanded after instruction selection.
     PPC970_Single = 1 in {
   def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
                               i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
@@ -539,7 +539,7 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
                       PPC970_DGroup_Single;
 
 // Atomic operations
-let usesCustomDAGSchedInserter = 1 in {
+let usesCustomInserter = 1 in {
   let Uses = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 2740387..a345d3d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1594,44 +1594,77 @@ int int_char(char m) {if(m>7) return 0; return m;}
 
 //===---------------------------------------------------------------------===//
 
-libanalysis is not aggressively folding vector bitcasts.  For example, the
-constant expressions generated when compiling this code:
+IPSCCP is propagating elements of first class aggregates, but is not propagating
+the entire aggregate itself.  This leads it to miss opportunities, for example
+in test/Transforms/SCCP/ipsccp-basic.ll:test5b.
 
-union vec2d {
-    double e[2];
-    double v __attribute__((vector_size(16)));
-};
-typedef union vec2d vec2d;
+//===---------------------------------------------------------------------===//
 
-static vec2d a={{1,2}}, b={{3,4}};
-    
-vec2d foo () {
-    return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v };
-}
+int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; }
 
-in X86-32 end up being:
+Generates this:
 
-define void @foo(%union.vec2d* noalias nocapture sret %agg.result) nounwind ssp {
+define i32 @func(i32 %a, i32 %b) nounwind readnone ssp {
 entry:
-  %agg.result.0 = getelementptr %union.vec2d* %agg.result, i32 0, i32 0 ; <<2 x double>*> [#uses=1]
-  store <2 x double> fadd (<2 x double> bitcast (<1 x i128> <i128 85070591730234615870450834276742070272> to <2 x double>), <2 x double> fmul (<2 x double> bitcast (<1 x i128> <i128 85153668479971173112514077617450647552> to <2 x double>), <2 x double> <double 5.000000e+00, double 5.000000e+00>)), <2 x double>* %agg.result.0, align 16
-  ret void
+  %0 = and i32 %a, 128                            ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  %2 = or i32 %b, 128                             ; <i32> [#uses=1]
+  %3 = and i32 %b, -129                           ; <i32> [#uses=1]
+  %b_addr.0 = select i1 %1, i32 %3, i32 %2        ; <i32> [#uses=1]
+  ret i32 %b_addr.0
 }
 
-and in X86-64 mode:
+However, it's functionally equivalent to:
+
+         b = (b & ~0x80) | (a & 0x80);
 
-define %0 @foo() nounwind readnone ssp {
+Which generates this:
+
+define i32 @func(i32 %a, i32 %b) nounwind readnone ssp {
 entry:
-  %mrv5 = insertvalue %0 undef, double extractelement (<2 x double> fadd (<2 x double> bitcast (<1 x i128> <i128 85070591730234615870450834276742070272> to <2 x double>), <2 x double> fmul (<2 x double> bitcast (<1 x i128> <i128 85153668479971173112514077617450647552> to <2 x double>), <2 x double> bitcast (<1 x i128> <i128 85174437667405312423031577302488055808> to <2 x double>))), i32 0), 0 ; <%0> [#uses=1]
-  %mrv6 = insertvalue %0 %mrv5, double extractelement (<2 x double> fadd (<2 x double> bitcast (<1 x i128> <i128 85070591730234615870450834276742070272> to <2 x double>), <2 x double> fmul (<2 x double> bitcast (<1 x i128> <i128 85153668479971173112514077617450647552> to <2 x double>), <2 x double> bitcast (<1 x i128> <i128 85174437667405312423031577302488055808> to <2 x double>))), i32 1), 1 ; <%0> [#uses=1]
-  ret %0 %mrv6
+  %0 = and i32 %b, -129                           ; <i32> [#uses=1]
+  %1 = and i32 %a, 128                            ; <i32> [#uses=1]
+  %2 = or i32 %0, %1                              ; <i32> [#uses=1]
+  ret i32 %2
 }
 
-//===---------------------------------------------------------------------===//
+This can be generalized for other forms:
 
-IPSCCP is propagating elements of first class aggregates, but is not propagating
-the entire aggregate itself.  This leads it to miss opportunities, for example
-in test/Transforms/SCCP/ipsccp-basic.ll:test5b.
+     b = (b & ~0x80) | (a & 0x40) << 1;
 
 //===---------------------------------------------------------------------===//
 
+These two functions produce different code. They shouldn't:
+
+#include <stdint.h>
+ 
+uint8_t p1(uint8_t b, uint8_t a) {
+  b = (b & ~0xc0) | (a & 0xc0);
+  return (b);
+}
+ 
+uint8_t p2(uint8_t b, uint8_t a) {
+  b = (b & ~0x40) | (a & 0x40);
+  b = (b & ~0x80) | (a & 0x80);
+  return (b);
+}
+
+define zeroext i8 @p1(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp {
+entry:
+  %0 = and i8 %b, 63                              ; <i8> [#uses=1]
+  %1 = and i8 %a, -64                             ; <i8> [#uses=1]
+  %2 = or i8 %1, %0                               ; <i8> [#uses=1]
+  ret i8 %2
+}
+
+define zeroext i8 @p2(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp {
+entry:
+  %0 = and i8 %b, 63                              ; <i8> [#uses=1]
+  %.masked = and i8 %a, 64                        ; <i8> [#uses=1]
+  %1 = and i8 %a, -128                            ; <i8> [#uses=1]
+  %2 = or i8 %1, %0                               ; <i8> [#uses=1]
+  %3 = or i8 %2, %.masked                         ; <i8> [#uses=1]
+  ret i8 %3
+}
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index a3e5fba..452b46f 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -44,7 +44,7 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  class VISIBILITY_HIDDEN SparcAsmPrinter : public AsmPrinter {
+  class SparcAsmPrinter : public AsmPrinter {
     /// We name each basic block in a Function with a unique number, so
     /// that we can consistently refer to them later. This is cleared
     /// at the beginning of each call to runOnMachineFunction().
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 44821b8..f2f1b96 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -238,10 +238,10 @@ let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
                       [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
 }
 
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded by the
-// scheduler into a branch sequence.  This has to handle all permutations of
-// selection between i32/f32/f64 on ICC and FCC.
-let usesCustomDAGSchedInserter = 1 in {   // Expanded by the scheduler.
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.  This has to handle all
+// permutations of selection between i32/f32/f64 on ICC and FCC.
+let usesCustomInserter = 1 in {   // Expanded after instruction selection.
   def SELECT_CC_Int_ICC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index a128992..a4a8d6a 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -33,7 +33,6 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
 
@@ -42,7 +41,7 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
-  class VISIBILITY_HIDDEN SystemZAsmPrinter : public AsmPrinter {
+  class SystemZAsmPrinter : public AsmPrinter {
   public:
     SystemZAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
                       const MCAsmInfo *MAI, bool V)
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 07e0d83..5c8cae0 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -53,11 +53,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   if (!UseSoftFloat) {
     addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
     addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
-
-    addLegalFPImmediate(APFloat(+0.0));  // lzer
-    addLegalFPImmediate(APFloat(+0.0f)); // lzdr
-    addLegalFPImmediate(APFloat(-0.0));  // lzer + lner
-    addLegalFPImmediate(APFloat(-0.0f)); // lzdr + lndr
   }
 
   // Compute derived properties from the register classes
@@ -80,7 +75,13 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 
   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
-  setSchedulingPreference(SchedulingForLatency);
+
+  // TODO: It may be better to default to latency-oriented scheduling, however
+  // LLVM's current latency-oriented scheduler can't handle physreg definitions
+  // such as SystemZ has with PSW, so set this to the register-pressure
+  // scheduler, because it can.
+  setSchedulingPreference(SchedulingForRegPressure);
+
   setBooleanContents(ZeroOrOneBooleanContent);
 
   setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
@@ -169,6 +170,17 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
+    return false;
+
+  // +0.0  lzer
+  // +0.0f lzdr
+  // -0.0  lzer + lner
+  // -0.0f lzdr + lndr
+  return Imm.isZero() || Imm.isNegZero();
+}
+
 //===----------------------------------------------------------------------===//
 //                       SystemZ Inline Assembly Support
 //===----------------------------------------------------------------------===//
@@ -657,7 +669,7 @@ SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
 
   DebugLoc dl = LHS.getDebugLoc();
   return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
-                     dl, MVT::Flag, LHS, RHS);
+                     dl, MVT::i64, LHS, RHS);
 }
 
 
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index c2c24bc..5bf1ed6 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -89,6 +89,11 @@ namespace llvm {
                                                    MachineBasicBlock *BB,
                     DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
   private:
     SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
                            CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 8a202d4..336e20e 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -25,15 +25,15 @@ def fpimmneg0 : PatLeaf<(fpimm), [{
   return N->isExactlyValue(-0.0);
 }]>;
 
-let usesCustomDAGSchedInserter = 1 in {
+let Uses = [PSW], usesCustomInserter = 1 in {
   def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
                         "# SelectF32 PSEUDO",
                         [(set FP32:$dst,
-                              (SystemZselect FP32:$src1, FP32:$src2, imm:$cc))]>;
+                              (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
   def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
                         "# SelectF64 PSEUDO",
                         [(set FP64:$dst,
-                              (SystemZselect FP64:$src1, FP64:$src2, imm:$cc))]>;
+                              (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 56d75dd..1891bba 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -32,12 +32,12 @@ def SDT_SystemZCall         : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
 def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
 def SDT_SystemZCallSeqEnd   : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
 def SDT_CmpTest             : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
-def SDT_BrCond              : SDTypeProfile<0, 2,
+def SDT_BrCond              : SDTypeProfile<0, 3,
                                            [SDTCisVT<0, OtherVT>,
-                                            SDTCisI8<1>]>;
-def SDT_SelectCC            : SDTypeProfile<1, 3,
+                                            SDTCisI8<1>, SDTCisVT<2, i64>]>;
+def SDT_SelectCC            : SDTypeProfile<1, 4,
                                            [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
-                                            SDTCisI8<3>]>;
+                                            SDTCisI8<3>, SDTCisVT<4, i64>]>;
 def SDT_Address             : SDTypeProfile<1, 1,
                                             [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
 
@@ -54,11 +54,11 @@ def SystemZcallseq_start :
 def SystemZcallseq_end :
                  SDNode<"ISD::CALLSEQ_END",   SDT_SystemZCallSeqEnd,
                         [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest, [SDNPOutFlag]>;
-def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest, [SDNPOutFlag]>;
+def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
+def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
 def SystemZbrcond  : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
-                            [SDNPHasChain, SDNPInFlag]>;
-def SystemZselect  : SDNode<"SystemZISD::SELECT", SDT_SelectCC, [SDNPInFlag]>;
+                            [SDNPHasChain]>;
+def SystemZselect  : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
 def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
 
 
@@ -74,15 +74,15 @@ def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
                               "#ADJCALLSTACKUP",
                               [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
 
-let usesCustomDAGSchedInserter = 1 in {
+let Uses = [PSW], usesCustomInserter = 1 in {
   def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
                         "# Select32 PSEUDO",
                         [(set GR32:$dst,
-                              (SystemZselect GR32:$src1, GR32:$src2, imm:$cc))]>;
+                              (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
   def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
                         "# Select64 PSEUDO",
                         [(set GR64:$dst,
-                              (SystemZselect GR64:$src1, GR64:$src2, imm:$cc))]>;
+                              (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
 }
 
 
@@ -106,46 +106,46 @@ let isBranch = 1, isTerminator = 1 in {
   let Uses = [PSW] in {
     def JO  : Pseudo<(outs), (ins brtarget:$dst),
                      "jo\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
     def JH  : Pseudo<(outs), (ins brtarget:$dst),
                      "jh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
     def JNLE: Pseudo<(outs), (ins brtarget:$dst),
                      "jnle\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
     def JL  : Pseudo<(outs), (ins brtarget:$dst),
                      "jl\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
     def JNHE: Pseudo<(outs), (ins brtarget:$dst),
                      "jnhe\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
     def JLH : Pseudo<(outs), (ins brtarget:$dst),
                      "jlh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
     def JNE : Pseudo<(outs), (ins brtarget:$dst),
                      "jne\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
     def JE  : Pseudo<(outs), (ins brtarget:$dst),
                      "je\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
     def JNLH: Pseudo<(outs), (ins brtarget:$dst),
                      "jnlh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
     def JHE : Pseudo<(outs), (ins brtarget:$dst),
                      "jhe\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
     def JNL : Pseudo<(outs), (ins brtarget:$dst),
                      "jnl\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
     def JLE : Pseudo<(outs), (ins brtarget:$dst),
                      "jle\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
     def JNH : Pseudo<(outs), (ins brtarget:$dst),
                      "jnh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
     def JNO : Pseudo<(outs), (ins brtarget:$dst),
                      "jno\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO)]>;
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
   } // Uses = [PSW]
 } // isBranch = 1
 
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index d498c57..24787a8 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -329,6 +329,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
       break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(
+                                                 MO.getBlockAddress()));
+      break;
     }
     
     OutMI.addOperand(MCOp);
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 046d35c..9b7aab8 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1952,3 +1952,5 @@ fact these instructions are identical to the non-lock versions. We need a way to
 add target specific information to target nodes and have this information
 carried over to machine instructions. Asm printer (or JIT) can use this
 information to add the "lock" prefix.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f942f3f..a0bded3 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -19,6 +19,7 @@
 #include "X86TargetMachine.h"
 #include "X86Relocations.h"
 #include "X86.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
@@ -32,7 +33,6 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -43,7 +43,7 @@ STATISTIC(NumEmitted, "Number of machine instructions emitted");
 
 namespace {
   template<class CodeEmitter>
-  class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
+  class Emitter : public MachineFunctionPass {
     const X86InstrInfo  *II;
     const TargetData    *TD;
     X86TargetMachine    &TM;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index d9a05a8..a2fe9b0 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -40,7 +40,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -53,7 +52,7 @@ STATISTIC(NumFXCH, "Number of fxch instructions inserted");
 STATISTIC(NumFP  , "Number of floating point instructions");
 
 namespace {
-  struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass {
+  struct FPS : public MachineFunctionPass {
     static char ID;
     FPS() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 3e0385c..34a0045 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -22,7 +22,6 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -30,7 +29,7 @@ using namespace llvm;
 STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added");
 
 namespace {
-  struct VISIBILITY_HIDDEN FPRegKiller : public MachineFunctionPass {
+  struct FPRegKiller : public MachineFunctionPass {
     static char ID;
     FPRegKiller() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5b678fb..122f515 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -33,7 +33,6 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -72,6 +71,7 @@ namespace {
     SDValue Segment;
     GlobalValue *GV;
     Constant *CP;
+    BlockAddress *BlockAddr;
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
@@ -79,12 +79,12 @@ namespace {
 
     X86ISelAddressMode()
       : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
-        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0),
+        Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
         SymbolFlags(X86II::MO_NO_FLAG) {
     }
 
     bool hasSymbolicDisplacement() const {
-      return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+      return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
     }
     
     bool hasBaseOrIndexReg() const {
@@ -147,7 +147,7 @@ namespace {
   /// ISel - X86 specific code to select X86 machine instructions for
   /// SelectionDAG operations.
   ///
-  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
+  class X86DAGToDAGISel : public SelectionDAGISel {
     /// X86Lowering - This object fully describes how to lower LLVM code to an
     /// X86-specific SelectionDAG.
     X86TargetLowering &X86Lowering;
@@ -242,6 +242,9 @@ namespace {
         Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
       else if (AM.JT != -1)
         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
+      else if (AM.BlockAddr)
+        Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
+                                       true /*AM.SymbolFlags*/);
       else
         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
 
@@ -761,10 +764,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
       AM.ES = S->getSymbol();
       AM.SymbolFlags = S->getTargetFlags();
-    } else {
-      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
+    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
       AM.JT = J->getIndex();
       AM.SymbolFlags = J->getTargetFlags();
+    } else {
+      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+      //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
     }
 
     if (N.getOpcode() == X86ISD::WrapperRIP)
@@ -790,10 +795,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
       AM.ES = S->getSymbol();
       AM.SymbolFlags = S->getTargetFlags();
-    } else {
-      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
+    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
       AM.JT = J->getIndex();
       AM.SymbolFlags = J->getTargetFlags();
+    } else {
+      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+      //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
     }
     return false;
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e5e7bc8..86ec9f2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -328,11 +328,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->is64Bit())
     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
+  setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
     setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
+    setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
   }
   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
@@ -2310,6 +2312,17 @@ static bool hasFPCMov(unsigned X86CC) {
   }
 }
 
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
+    if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+      return true;
+  }
+  return false;
+}
+
 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
 /// the specified range (L, H].
 static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -4682,6 +4695,24 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+
+  DebugLoc DL = Op.getDebugLoc();
+
+  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+  return Result;
+}
+
+SDValue
 X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
                                       int64_t Offset,
                                       SelectionDAG &DAG) const {
@@ -7008,6 +7039,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
   case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   case ISD::SHL_PARTS:
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 66a9107..7b59b81 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -499,6 +499,11 @@ namespace llvm {
     /// from i32 to i8 but not from i32 to i16.
     virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
 
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
     /// isShuffleMaskLegal - Targets can use this to indicate that they only
     /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
     /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
@@ -584,6 +589,15 @@ namespace llvm {
     bool X86ScalarSSEf32;
     bool X86ScalarSSEf64;
 
+    /// LegalFPImmediates - A list of legal fp immediates.
+    std::vector<APFloat> LegalFPImmediates;
+
+    /// addLegalFPImmediate - Indicate that this x86 target can instruction
+    /// select the specified FP immediate natively.
+    void addLegalFPImmediate(const APFloat& Imm) {
+      LegalFPImmediates.push_back(Imm);
+    }
+
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -621,6 +635,7 @@ namespace llvm {
     SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
                                int64_t Offset, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index c1b7b8f..3edced7 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1541,7 +1541,7 @@ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
 }
 // Atomic exchange, and, or, xor
 let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomDAGSchedInserter = 1 in {
+                  usesCustomInserter = 1 in {
 def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
                "#ATOMAND64 PSEUDO!", 
                [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
@@ -1595,6 +1595,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
 
 // In static codegen with small code model, we can get the address of a label
 // into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
@@ -1607,6 +1609,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
 
 // In kernel code model, we can get the address of a label
 // into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
@@ -1619,6 +1623,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
 
 // If we have small model and -static mode, it is safe to store global addresses
 // directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
@@ -1635,6 +1641,9 @@ def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
 def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, texternalsym:$src)>,
           Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+          Requires<[NearData, IsStatic]>;
 
 // Calls
 // Direct PC relative function call for small code model. 32-bit displacement
@@ -1799,43 +1808,43 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
           (SUBREG_TO_REG
             (i64 0),
             (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD),
+              (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
                               x86_subreg_8bit_hi)),
             x86_subreg_32bit)>;
 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
           (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(srl_su GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                               x86_subreg_8bit_hi)),
             x86_subreg_16bit)>,
       Requires<[In64BitMode]>;
 def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
           (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
           (SUBREG_TO_REG
             (i64 0),
             (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                               x86_subreg_8bit_hi)),
             x86_subreg_32bit)>;
 def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
           (SUBREG_TO_REG
             (i64 0),
             (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                               x86_subreg_8bit_hi)),
             x86_subreg_32bit)>;
 
@@ -1843,18 +1852,18 @@ def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
 def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
-            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD),
+            (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
                             x86_subreg_8bit_hi))>;
 def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
-            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
 def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
-            (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
 
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 7e37373..b0b0409 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -69,7 +69,7 @@ def fpimmneg1 : PatLeaf<(fpimm), [{
 }]>;
 
 // Some 'special' instructions
-let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
+let usesCustomInserter = 1 in {  // Expanded after instruction selection.
   def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
                               (outs), (ins i16mem:$dst, RFP32:$src),
                               "##FP32_TO_INT16_IN_MEM PSEUDO!",
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e8a39d1..87bc10d 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2621,7 +2621,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
 }
 
 unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
-                                      bool UnfoldLoad, bool UnfoldStore) const {
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex) const {
   DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
     MemOp2RegOpTable.find((unsigned*)Opc);
   if (I == MemOp2RegOpTable.end())
@@ -2632,6 +2633,8 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
     return 0;
   if (UnfoldStore && !FoldedStore)
     return 0;
+  if (LoadRegIndex)
+    *LoadRegIndex = I->second.second & 0xf;
   return I->second.first;
 }
 
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 2237c8b..6eb07d5 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -560,9 +560,12 @@ public:
   /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
   /// instruction after load / store are unfolded from an instruction of the
   /// specified opcode. It returns zero if the specified unfolding is not
-  /// possible.
+  /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+  /// index of the operand which will hold the register holding the loaded
+  /// value.
   virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
-                                      bool UnfoldLoad, bool UnfoldStore) const;
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const;
   
   virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 16b2af7..9b82e1e 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -524,7 +524,7 @@ def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
 }
 
 // x86-64 va_start lowering magic.
-let usesCustomDAGSchedInserter = 1 in
+let usesCustomInserter = 1 in
 def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                               (outs),
                               (ins GR8:$al,
@@ -1129,13 +1129,13 @@ let isTwoAddress = 1 in {
 // Conditional moves
 let Uses = [EFLAGS] in {
 
-// X86 doesn't have 8-bit conditional moves. Use a customDAGSchedInserter to
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
 // emit control flow. An alternative to this is to mark i8 SELECT as Promote,
 // however that requires promoting the operands, and can induce additional
 // i8 register pressure. Note that CMOV_GR8 is conservatively considered to
 // clobber EFLAGS, because if one of the operands is zero, the expansion
 // could involve an xor.
-let usesCustomDAGSchedInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in
+let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in
 def CMOV_GR8 : I<0, Pseudo,
                  (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
                  "#CMOV_GR8 PSEUDO!",
@@ -3667,7 +3667,7 @@ def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
 
 // Atomic exchange, and, or, xor
 let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomDAGSchedInserter = 1 in {
+                  usesCustomInserter = 1 in {
 def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
                "#ATOMAND32 PSEUDO!", 
                [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
@@ -3736,7 +3736,7 @@ let Constraints = "$val1 = $dst1, $val2 = $dst2",
                   Defs = [EFLAGS, EAX, EBX, ECX, EDX],
                   Uses = [EAX, EBX, ECX, EDX],
                   mayLoad = 1, mayStore = 1,
-                  usesCustomDAGSchedInserter = 1 in {
+                  usesCustomInserter = 1 in {
 def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
                                (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
                "#ATOMAND6432 PSEUDO!", []>;
@@ -3789,6 +3789,7 @@ def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
 def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
 def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
 def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
 
 def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
           (ADD32ri GR32:$src1, tconstpool:$src2)>;
@@ -3798,11 +3799,15 @@ def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
           (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
 def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
           (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
 
 def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
           (MOV32mi addr:$dst, tglobaladdr:$src)>;
 def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tblockaddress:$src)>;
 
 // Calls
 // tailcall stuff
@@ -3964,12 +3969,14 @@ def : Pat<(and GR32:$src1, 0xffff),
           (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR32:$src1, 0xff),
-          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, 
+                                                             GR32_ABCD)),
                                       x86_subreg_8bit))>,
       Requires<[In32BitMode]>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR16:$src1, 0xff),
-          (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD),
+          (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, 
+                                                             GR16_ABCD)),
                                       x86_subreg_8bit))>,
       Requires<[In32BitMode]>;
 
@@ -3977,11 +3984,13 @@ def : Pat<(and GR16:$src1, 0xff),
 def : Pat<(sext_inreg GR32:$src, i16),
           (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
 def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+          (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
+                                                             GR32_ABCD)),
                                       x86_subreg_8bit))>,
       Requires<[In32BitMode]>;
 def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+          (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
+                                                             GR16_ABCD)),
                                       x86_subreg_8bit))>,
       Requires<[In32BitMode]>;
 
@@ -3989,40 +3998,40 @@ def : Pat<(sext_inreg GR16:$src, i8),
 def : Pat<(i16 (trunc GR32:$src)),
           (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
 def : Pat<(i8 (trunc GR32:$src)),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                           x86_subreg_8bit)>,
       Requires<[In32BitMode]>;
 def : Pat<(i8 (trunc GR16:$src)),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                           x86_subreg_8bit)>,
       Requires<[In32BitMode]>;
 
 // h-register tricks
 def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
-          (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(srl_su GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32rr8
-              (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                               x86_subreg_8bit_hi)),
             x86_subreg_16bit)>,
       Requires<[In32BitMode]>;
 def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                                       x86_subreg_8bit_hi))>,
       Requires<[In32BitMode]>;
 def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                                       x86_subreg_8bit_hi))>,
       Requires<[In32BitMode]>;
 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
-          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                                       x86_subreg_8bit_hi))>,
       Requires<[In32BitMode]>;
 
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index ce76b4e..500785b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -706,10 +706,9 @@ def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
 def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
           (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
 
-// CMOV* - Used to implement the SELECT DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-// These are expanded by the scheduler.
-let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+// CMOV* - Used to implement the SELECT DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
   def CMOV_V1I64 : I<0, Pseudo,
                     (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
                     "#CMOV_V1I64 PSEUDO!",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f4e97c9..be242a0 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -174,7 +174,8 @@ def fp32imm0 : PatLeaf<(f32 fpimm), [{
   return N->isExactlyValue(+0.0);
 }]>;
 
-def PSxLDQ_imm  : SDNodeXForm<imm, [{
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm  : SDNodeXForm<imm, [{
   // Transformation function: imm >> 3
   return getI32Imm(N->getZExtValue() >> 3);
 }]>;
@@ -298,10 +299,9 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs),
 // SSE scalar FP Instructions
 //===----------------------------------------------------------------------===//
 
-// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-// These are expanded by the scheduler.
-let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
   def CMOV_FR32 : I<0, Pseudo,
                     (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
                     "#CMOV_FR32 PSEUDO!",
@@ -1996,21 +1996,21 @@ let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
 
 let Predicates = [HasSSE2] in {
   def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
-            (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+            (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
   def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
-            (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+            (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
   def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
             (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
   def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
             (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
-            (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+            (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
 
   // Shift up / down and insert zero's.
   def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
-            (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+            (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
   def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
-            (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+            (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
 }
 
 // Logical
@@ -2822,37 +2822,41 @@ let Constraints = "$src1 = $dst" in {
   def PALIGNR64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
                            (ins VR64:$src1, VR64:$src2, i16imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                           [(set VR64:$dst,
-                             (int_x86_ssse3_palign_r
-                              VR64:$src1, VR64:$src2,
-                              imm:$src3))]>;
+                           []>;
   def PALIGNR64rm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
                            (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                           [(set VR64:$dst,
-                             (int_x86_ssse3_palign_r
-                              VR64:$src1,
-                              (bitconvert (memopv2i32 addr:$src2)),
-                              imm:$src3))]>;
+                           []>;
 
   def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
                            (ins VR128:$src1, VR128:$src2, i32imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                           [(set VR128:$dst,
-                             (int_x86_ssse3_palign_r_128
-                              VR128:$src1, VR128:$src2,
-                              imm:$src3))]>, OpSize;
+                           []>, OpSize;
   def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
                            (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                           [(set VR128:$dst,
-                             (int_x86_ssse3_palign_r_128
-                              VR128:$src1,
-                              (bitconvert (memopv4i32 addr:$src2)),
-                              imm:$src3))]>, OpSize;
+                           []>, OpSize;
 }
 
 // palignr patterns.
+def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i16 imm:$src3)),
+          (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>,
+          Requires<[HasSSSE3]>;
+def : Pat<(int_x86_ssse3_palign_r VR64:$src1,
+                                      (memop64 addr:$src2),
+                                      (i16 imm:$src3)),
+          (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
+          Requires<[HasSSSE3]>;
+
+def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i32 imm:$src3)),
+          (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>,
+          Requires<[HasSSSE3]>;
+def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1,
+                                      (memopv2i64 addr:$src2),
+                                      (i32 imm:$src3)),
+          (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
+          Requires<[HasSSSE3]>;
+
 let AddedComplexity = 5 in {
 def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
           (PALIGNR128rr VR128:$src2, VR128:$src1,
@@ -3802,7 +3806,7 @@ let Constraints = "$src1 = $dst" in {
 }
 
 // String/text processing instructions.
-let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
 def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
 			(ins VR128:$src1, VR128:$src2, i8imm:$src3),
 		    "#PCMPISTRM128rr PSEUDO!",
@@ -3830,7 +3834,7 @@ def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
 }
 
 let Defs = [EFLAGS], Uses = [EAX, EDX],
-	usesCustomDAGSchedInserter = 1 in {
+	usesCustomInserter = 1 in {
 def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
 			(ins VR128:$src1, VR128:$src3, i8imm:$src5),
 		    "#PCMPESTRM128rr PSEUDO!",
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f03723a..c5ff525 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -38,7 +38,6 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -1473,7 +1472,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
 #include "X86GenRegisterInfo.inc"
 
 namespace {
-  struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass {
+  struct MSAC : public MachineFunctionPass {
     static char ID;
     MSAC() : MachineFunctionPass(&ID) {}
 
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index e58edda..bc1bbc3 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -52,7 +52,7 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
   cl::init(8));
 
 namespace {
-  class VISIBILITY_HIDDEN XCoreAsmPrinter : public AsmPrinter {
+  class XCoreAsmPrinter : public AsmPrinter {
     const XCoreSubtarget &Subtarget;
   public:
     explicit XCoreAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 4b9ea7a..68e69a2 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -357,9 +357,9 @@ def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
                             "${:comment} STWFI $src, $addr",
                             [(store GRRegs:$src, ADDRspii:$addr)]>;
 
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1 in {
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {
   def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
                               (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
                               "${:comment} SELECT_CC PSEUDO!",
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 5b91f3d..dd5a6d8 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -41,7 +41,6 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -59,7 +58,7 @@ STATISTIC(NumArgumentsDead     , "Number of dead pointer args eliminated");
 namespace {
   /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
   ///
-  struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass {
+  struct ArgPromotion : public CallGraphSCCPass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<AliasAnalysis>();
       CallGraphSCCPass::getAnalysisUsage(AU);
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 5c28801..92bef3b 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -19,7 +19,6 @@ add_llvm_library(LLVMipo
   PartialInlining.cpp
   PartialSpecialization.cpp
   PruneEH.cpp
-  RaiseAllocations.cpp
   StripDeadPrototypes.cpp
   StripSymbols.cpp
   StructRetPromotion.cpp
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index c1a1045..4972687 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -22,14 +22,13 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include <map>
 using namespace llvm;
 
 STATISTIC(NumMerged, "Number of global constants merged");
 
 namespace {
-  struct VISIBILITY_HIDDEN ConstantMerge : public ModulePass {
+  struct ConstantMerge : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
     ConstantMerge() : ModulePass(&ID) {}
 
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 79a32f0..a3db836 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -33,7 +33,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Compiler.h"
 #include <map>
 #include <set>
 using namespace llvm;
@@ -44,7 +43,7 @@ STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
 namespace {
   /// DAE - The dead argument elimination pass.
   ///
-  class VISIBILITY_HIDDEN DAE : public ModulePass {
+  class DAE : public ModulePass {
   public:
 
     /// Struct that represents (part of) either a return value or a function
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 85aed2b..025d77e 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -19,13 +19,12 @@
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
 
 namespace {
-  struct VISIBILITY_HIDDEN DTE : public ModulePass {
+  struct DTE : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
     DTE() : ModulePass(&ID) {}
 
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 191100c..7f67e48 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -17,13 +17,12 @@
 #include "llvm/Pass.h"
 #include "llvm/Constants.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Support/Compiler.h"
 #include <algorithm>
 using namespace llvm;
 
 namespace {
   /// @brief A pass to extract specific functions and their dependencies.
-  class VISIBILITY_HIDDEN GVExtractorPass : public ModulePass {
+  class GVExtractorPass : public ModulePass {
     std::vector<GlobalValue*> Named;
     bool deleteStuff;
     bool reLink;
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 0701b94..b3a832f 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -26,11 +26,10 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 using namespace llvm;
 
@@ -40,7 +39,7 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
 STATISTIC(NumNoAlias, "Number of function returns marked noalias");
 
 namespace {
-  struct VISIBILITY_HIDDEN FunctionAttrs : public CallGraphSCCPass {
+  struct FunctionAttrs : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
     FunctionAttrs() : CallGraphSCCPass(&ID) {}
 
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 8f4e8b3..44216a6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -20,9 +20,8 @@
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(NumAliases  , "Number of global aliases removed");
@@ -30,7 +29,7 @@ STATISTIC(NumFunctions, "Number of functions removed");
 STATISTIC(NumVariables, "Number of global variables removed");
 
 namespace {
-  struct VISIBILITY_HIDDEN GlobalDCE : public ModulePass {
+  struct GlobalDCE : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
     GlobalDCE() : ModulePass(&ID) {}
 
@@ -40,7 +39,7 @@ namespace {
     bool runOnModule(Module &M);
 
   private:
-    std::set<GlobalValue*> AliveGlobals;
+    SmallPtrSet<GlobalValue*, 32> AliveGlobals;
 
     /// GlobalIsNeeded - mark the specific global value as needed, and
     /// recursively mark anything that it uses as also needed.
@@ -92,7 +91,8 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // The first pass is to drop initializers of global variables which are dead.
   std::vector<GlobalVariable*> DeadGlobalVars;   // Keep track of dead globals
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
     if (!AliveGlobals.count(I)) {
       DeadGlobalVars.push_back(I);         // Keep track of dead globals
       I->setInitializer(0);
@@ -155,14 +155,10 @@ bool GlobalDCE::runOnModule(Module &M) {
 /// GlobalIsNeeded - the specific global value as needed, and
 /// recursively mark anything that it uses as also needed.
 void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
-  std::set<GlobalValue*>::iterator I = AliveGlobals.find(G);
-
   // If the global is already in the set, no need to reprocess it.
-  if (I != AliveGlobals.end()) return;
-
-  // Otherwise insert it now, so we do not infinitely recurse
-  AliveGlobals.insert(I, G);
-
+  if (!AliveGlobals.insert(G))
+    return;
+  
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
     // If this is a global variable, we must make sure to add any global values
     // referenced by the initializer to the alive set.
@@ -177,11 +173,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
     // operands.  Any operands of these types must be processed to ensure that
     // any globals used will be marked as needed.
     Function *F = cast<Function>(G);
-    // For all basic blocks...
+
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      // For all instructions...
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-        // For all operands...
         for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
           if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
             GlobalIsNeeded(GV);
@@ -192,13 +186,13 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
 
 void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
   if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    GlobalIsNeeded(GV);
-  else {
-    // Loop over all of the operands of the constant, adding any globals they
-    // use to the list of needed globals.
-    for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
-      MarkUsedGlobalsAsNeeded(cast<Constant>(*I));
-  }
+    return GlobalIsNeeded(GV);
+  
+  // Loop over all of the operands of the constant, adding any globals they
+  // use to the list of needed globals.
+  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+    if (Constant *OpC = dyn_cast<Constant>(*I))
+      MarkUsedGlobalsAsNeeded(OpC);
 }
 
 // RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 9ced2e8..5dab9ef 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -24,10 +24,9 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -57,7 +56,7 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
 
 namespace {
-  struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {
+  struct GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     }
     static char ID; // Pass identification, replacement for typeid
@@ -85,7 +84,7 @@ namespace {
 /// GlobalStatus - As we analyze each global, keep track of some information
 /// about it.  If we find out that the address of the global is taken, none of
 /// this info will be accurate.
-struct VISIBILITY_HIDDEN GlobalStatus {
+struct GlobalStatus {
   /// isLoaded - True if the global is ever loaded.  If the global isn't ever
   /// loaded it can be deleted.
   bool isLoaded;
@@ -824,6 +823,7 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      CallInst *CI,
                                                      BitCastInst *BCI,
+                                                     Value* NElems,
                                                      LLVMContext &Context,
                                                      TargetData* TD) {
   DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
@@ -831,9 +831,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 
   const Type *IntPtrTy = TD->getIntPtrType(Context);
   
-  Value* ArraySize = getMallocArraySize(CI, Context, TD);
-  assert(ArraySize && "not a malloc whose array size can be determined");
-  ConstantInt *NElements = cast<ConstantInt>(ArraySize);
+  ConstantInt *NElements = cast<ConstantInt>(NElems);
   if (NElements->getZExtValue() != 1) {
     // If we have an array allocation, transform it to a single element
     // allocation to make the code below simpler.
@@ -1276,15 +1274,14 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
-                                            CallInst *CI, BitCastInst* BCI, 
+                                            CallInst *CI, BitCastInst* BCI,
+                                            Value* NElems,
                                             LLVMContext &Context,
-                                            TargetData *TD){
+                                            TargetData *TD) {
   DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI 
                << " BITCAST = " << *BCI << '\n');
   const Type* MAT = getMallocAllocatedType(CI);
   const StructType *STy = cast<StructType>(MAT);
-  Value* ArraySize = getMallocArraySize(CI, Context, TD);
-  assert(ArraySize && "not a malloc whose array size can be determined");
 
   // There is guaranteed to be at least one use of the malloc (storing
   // it into GV).  If there are other uses, change them to be uses of
@@ -1310,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
     FieldGlobals.push_back(NGV);
     
     Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
-                                        FieldTy, ArraySize,
+                                        FieldTy, NElems,
                                         BCI->getName() + ".f" + Twine(FieldNo));
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, BCI);
@@ -1364,10 +1361,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
                                                OrigBB->getParent());
     BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
                                                OrigBB->getParent());
-    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
+    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+                                         Cmp, NullPtrBlock);
 
     // Fill in FreeBlock.
-    new FreeInst(GVVal, FreeBlock);
+    CallInst::CreateFree(GVVal, BI);
     new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
                   FreeBlock);
     BranchInst::Create(NextBlock, FreeBlock);
@@ -1510,7 +1508,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
       // something.
       if (TD && 
           NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
+        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD);
         return true;
       }
   
@@ -1520,7 +1518,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 
     // If this is an allocation of a fixed size array of structs, analyze as a
     // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
-    if (!isArrayMalloc(CI, Context, TD))
+    if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
       if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
         AllocTy = AT->getElementType();
   
@@ -1547,7 +1545,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
           CI = extractMallocCallFromBitCast(NewMI);
         }
       
-        GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
+        GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD);
         return true;
       }
     }
@@ -1878,9 +1876,8 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
     if (!F->hasName() && !F->isDeclaration())
       F->setLinkage(GlobalValue::InternalLinkage);
     F->removeDeadConstantUsers();
-    if (F->use_empty() && (F->hasLocalLinkage() ||
-                           F->hasLinkOnceLinkage())) {
-      M.getFunctionList().erase(F);
+    if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+      F->eraseFromParent();
       Changed = true;
       ++NumFnDeleted;
     } else if (F->hasLocalLinkage()) {
@@ -2343,6 +2340,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
           dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
         if (!Val) return false;  // Cannot determine.
         NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+        Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+          NewBB = BA->getBasicBlock();
+        else
+          return false;  // Cannot determine.
       } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
         if (RI->getNumOperands())
           RetVal = getVal(Values, RI->getOperand(0));
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 7b0e9c7..023e642 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
@@ -35,7 +34,7 @@ STATISTIC(NumReturnValProped, "Number of return values turned into constants");
 namespace {
   /// IPCP - The interprocedural constant propagation pass
   ///
-  struct VISIBILITY_HIDDEN IPCP : public ModulePass {
+  struct IPCP : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
     IPCP() : ModulePass(&ID) {}
 
@@ -87,6 +86,9 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
 
   unsigned NumNonconstant = 0;
   for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+    // Ignore blockaddress uses.
+    if (isa<BlockAddress>(*UI)) continue;
+    
     // Used by a non-instruction, or not the callee of a function, do not
     // transform.
     if (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI))
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 43066076..83e8624 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -63,7 +63,7 @@ void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createRaiseAllocationsPass());
+  // FIXME: Remove in LLVM 3.0.
 }
 
 void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 2344403..f11ecae 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -31,7 +30,7 @@ using namespace llvm;
 namespace {
 
   // AlwaysInliner only inlines functions that are mark as "always inline".
-  class VISIBILITY_HIDDEN AlwaysInliner : public Inliner {
+  class AlwaysInliner : public Inliner {
     // Functions that are never inlined
     SmallPtrSet<const Function*, 16> NeverInline; 
     InlineCostAnalyzer CA;
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index b1c643b..598043d 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -29,7 +28,7 @@ using namespace llvm;
 
 namespace {
 
-  class VISIBILITY_HIDDEN SimpleInliner : public Inliner {
+  class SimpleInliner : public Inliner {
     // Functions that are never inlined
     SmallPtrSet<const Function*, 16> NeverInline; 
     InlineCostAnalyzer CA;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index e3c3c67..20ae0d5 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -44,7 +43,7 @@ APIList("internalize-public-api-list", cl::value_desc("list"),
         cl::CommaSeparated);
 
 namespace {
-  class VISIBILITY_HIDDEN InternalizePass : public ModulePass {
+  class InternalizePass : public ModulePass {
     std::set<std::string> ExternalNames;
     /// If no api symbols were specified and a main function is defined,
     /// assume the main function is the only API
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 02ac3bb..fd69aeb 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/FunctionUtils.h"
 #include "llvm/ADT/Statistic.h"
@@ -33,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumExtracted, "Number of loops extracted");
 
 namespace {
-  struct VISIBILITY_HIDDEN LoopExtractor : public LoopPass {
+  struct LoopExtractor : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     unsigned NumLoops;
 
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 55194b3..4d61e83 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -43,14 +43,10 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VectorExtras.h"
-#include "llvm/ADT/SmallVector.h"
 #include <map>
 using namespace llvm;
 
@@ -62,8 +58,7 @@ STATISTIC(InvokesTransformed , "Number of invokes modified");
 namespace {
   //===--------------------------------------------------------------------===//
   // LowerSetJmp pass implementation.
-  class VISIBILITY_HIDDEN LowerSetJmp : public ModulePass,
-                      public InstVisitor<LowerSetJmp> {
+  class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
     // LLVM library functions...
     Constant *InitSJMap;        // __llvm_sjljeh_init_setjmpmap
     Constant *DestroySJMap;     // __llvm_sjljeh_destroy_setjmpmap
@@ -110,7 +105,7 @@ namespace {
     void TransformLongJmpCall(CallInst* Inst);
     void TransformSetJmpCall(CallInst* Inst);
 
-    bool IsTransformableFunction(const std::string& Name);
+    bool IsTransformableFunction(StringRef Name);
   public:
     static char ID; // Pass identification, replacement for typeid
     LowerSetJmp() : ModulePass(&ID) {}
@@ -251,13 +246,8 @@ bool LowerSetJmp::doInitialization(Module& M)
 // "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
 // handling functions (beginning with __llvm_sjljeh_...they don't throw
 // exceptions).
-bool LowerSetJmp::IsTransformableFunction(const std::string& Name) {
-  std::string SJLJEh("__llvm_sjljeh");
-
-  if (Name.size() > SJLJEh.size())
-    return std::string(Name.begin(), Name.begin() + SJLJEh.size()) != SJLJEh;
-
-  return true;
+bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
+  return !Name.startswith("__llvm_sjljeh_");
 }
 
 // TransformLongJmpCall - Transform a longjmp call into a call to the
@@ -265,8 +255,7 @@ bool LowerSetJmp::IsTransformableFunction(const std::string& Name) {
 // throwing the exception for us.
 void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
 {
-  const Type* SBPTy =
-        Type::getInt8PtrTy(Inst->getContext());
+  const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
 
   // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
   // same parameters as "longjmp", except that the buffer is cast to a
@@ -274,10 +263,8 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
   // Inst's uses and doesn't get a name.
   CastInst* CI = 
     new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst);
-  SmallVector<Value *, 2> Args;
-  Args.push_back(CI);
-  Args.push_back(Inst->getOperand(2));
-  CallInst::Create(ThrowLongJmp, Args.begin(), Args.end(), "", Inst);
+  Value *Args[] = { CI, Inst->getOperand(2) };
+  CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
 
   SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
 
@@ -392,11 +379,11 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
           Type::getInt8PtrTy(Inst->getContext());
   CastInst* BufPtr = 
     new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
-  std::vector<Value*> Args = 
-    make_vector<Value*>(GetSetJmpMap(Func), BufPtr,
-                        ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
-                                         SetJmpIDMap[Func]++), 0);
-  CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst);
+  Value *Args[] = {
+    GetSetJmpMap(Func), BufPtr,
+    ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
+  };
+  CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst);
 
   // We are guaranteed that there are no values live across basic blocks
   // (because we are "not in SSA form" yet), but there can still be values live
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 13bbf9c..b2bdabc 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -51,7 +51,6 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -62,7 +61,7 @@ using namespace llvm;
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
 
 namespace {
-  struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
+  struct MergeFunctions : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
     MergeFunctions() : ModulePass(&ID) {}
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 8f858d3..b955b97 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -21,14 +21,13 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/FunctionUtils.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 using namespace llvm;
 
 STATISTIC(NumPartialInlined, "Number of functions partially inlined");
 
 namespace {
-  struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
+  struct PartialInliner : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
     static char ID; // Pass identification, replacement for typeid
     PartialInliner() : ModulePass(&ID) {}
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
index 0e1fdb9..084b94e 100644
--- a/lib/Transforms/IPO/PartialSpecialization.cpp
+++ b/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -27,7 +27,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/DenseSet.h"
 #include <map>
 using namespace llvm;
@@ -41,7 +40,7 @@ static const int CallsMin = 5;
 static const double ConstValPercent = .1;
 
 namespace {
-  class VISIBILITY_HIDDEN PartSpec : public ModulePass {
+  class PartSpec : public ModulePass {
     void scanForInterest(Function&, SmallVector<int, 6>&);
     int scanDistribution(Function&, int, std::map<Constant*, int>&);
   public :
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index daf81e9..3ae771c 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -27,7 +27,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include <set>
 #include <algorithm>
 using namespace llvm;
@@ -36,7 +35,7 @@ STATISTIC(NumRemoved, "Number of invokes removed");
 STATISTIC(NumUnreach, "Number of noreturn calls optimized");
 
 namespace {
-  struct VISIBILITY_HIDDEN PruneEH : public CallGraphSCCPass {
+  struct PruneEH : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
     PruneEH() : CallGraphSCCPass(&ID) {}
 
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
deleted file mode 100644
index deb4405..0000000
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-//===- RaiseAllocations.cpp - Convert @free calls to insts ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the RaiseAllocations pass which convert free calls to free
-// instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "raiseallocs"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/ADT/Statistic.h"
-#include <algorithm>
-using namespace llvm;
-
-STATISTIC(NumRaised, "Number of allocations raised");
-
-namespace {
-  // RaiseAllocations - Turn @free calls into the appropriate
-  // instruction.
-  //
-  class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass {
-    Function *FreeFunc;   // Functions in the module we are processing
-                          // Initialized by doPassInitializationVirt
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    RaiseAllocations() 
-      : ModulePass(&ID), FreeFunc(0) {}
-
-    // doPassInitialization - For the raise allocations pass, this finds a
-    // declaration for free if it exists.
-    //
-    void doInitialization(Module &M);
-
-    // run - This method does the actual work of converting instructions over.
-    //
-    bool runOnModule(Module &M);
-  };
-}  // end anonymous namespace
-
-char RaiseAllocations::ID = 0;
-static RegisterPass<RaiseAllocations>
-X("raiseallocs", "Raise allocations from calls to instructions");
-
-// createRaiseAllocationsPass - The interface to this file...
-ModulePass *llvm::createRaiseAllocationsPass() {
-  return new RaiseAllocations();
-}
-
-
-// If the module has a symbol table, they might be referring to the free 
-// function.  If this is the case, grab the method pointers that the module is
-// using.
-//
-// Lookup @free in the symbol table, for later use.  If they don't
-// exist, or are not external, we do not worry about converting calls to that
-// function into the appropriate instruction.
-//
-void RaiseAllocations::doInitialization(Module &M) {
-  // Get free prototype if it exists!
-  FreeFunc = M.getFunction("free");
-  if (FreeFunc) {
-    const FunctionType* TyWeHave = FreeFunc->getFunctionType();
-    
-    // Get the expected prototype for void free(i8*)
-    const FunctionType *Free1Type =
-      FunctionType::get(Type::getVoidTy(M.getContext()),
-        std::vector<const Type*>(1, PointerType::getUnqual(
-                                 Type::getInt8Ty(M.getContext()))), 
-                                 false);
-
-    if (TyWeHave != Free1Type) {
-      // Check to see if the prototype was forgotten, giving us 
-      // void (...) * free
-      // This handles the common forward declaration of: 'void free();'
-      const FunctionType* Free2Type =
-                    FunctionType::get(Type::getVoidTy(M.getContext()), true);
-
-      if (TyWeHave != Free2Type) {
-        // One last try, check to see if we can find free as 
-        // int (...)* free.  This handles the case where NOTHING was declared.
-        const FunctionType* Free3Type =
-                    FunctionType::get(Type::getInt32Ty(M.getContext()), true);
-        
-        if (TyWeHave != Free3Type) {
-          // Give up.
-          FreeFunc = 0;
-        }
-      }
-    }
-  }
-
-  // Don't mess with locally defined versions of these functions...
-  if (FreeFunc && !FreeFunc->isDeclaration())     FreeFunc = 0;
-}
-
-// run - Transform calls into instructions...
-//
-bool RaiseAllocations::runOnModule(Module &M) {
-  // Find the free prototype...
-  doInitialization(M);
-  
-  bool Changed = false;
-
-  // Process all free calls...
-  if (FreeFunc) {
-    std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end());
-    std::vector<Value*> EqPointers;   // Values equal to FreeFunc
-
-    while (!Users.empty()) {
-      User *U = Users.back();
-      Users.pop_back();
-
-      if (Instruction *I = dyn_cast<Instruction>(U)) {
-        if (isa<InvokeInst>(I))
-          continue;
-        CallSite CS = CallSite::get(I);
-        if (CS.getInstruction() && !CS.arg_empty() &&
-            (CS.getCalledFunction() == FreeFunc ||
-             std::find(EqPointers.begin(), EqPointers.end(),
-                       CS.getCalledValue()) != EqPointers.end())) {
-
-          // If no prototype was provided for free, we may need to cast the
-          // source pointer.  This should be really uncommon, but it's necessary
-          // just in case we are dealing with weird code like this:
-          //   free((long)ptr);
-          //
-          Value *Source = *CS.arg_begin();
-          if (!isa<PointerType>(Source->getType()))
-            Source = new IntToPtrInst(Source,           
-                        Type::getInt8PtrTy(M.getContext()), 
-                                      "FreePtrCast", I);
-          new FreeInst(Source, I);
-
-          // If the old instruction was an invoke, add an unconditional branch
-          // before the invoke, which will become the new terminator.
-          if (InvokeInst *II = dyn_cast<InvokeInst>(I))
-            BranchInst::Create(II->getNormalDest(), I);
-
-          // Delete the old call site
-          if (I->getType() != Type::getVoidTy(M.getContext()))
-            I->replaceAllUsesWith(UndefValue::get(I->getType()));
-          I->eraseFromParent();
-          Changed = true;
-          ++NumRaised;
-        }
-      } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) {
-        Users.insert(Users.end(), GV->use_begin(), GV->use_end());
-        EqPointers.push_back(GV);
-      } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-        if (CE->isCast()) {
-          Users.insert(Users.end(), CE->use_begin(), CE->use_end());
-          EqPointers.push_back(CE);
-        }
-      }
-    }
-  }
-
-  return Changed;
-}
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index a94d78e..4566a76 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
@@ -27,7 +26,7 @@ STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
 namespace {
 
 /// @brief Pass to remove unused function declarations.
-class VISIBILITY_HIDDEN StripDeadPrototypesPass : public ModulePass {
+class StripDeadPrototypesPass : public ModulePass {
 public:
   static char ID; // Pass identification, replacement for typeid
   StripDeadPrototypesPass() : ModulePass(&ID) { }
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 57aaf43..4f6369e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -112,11 +112,11 @@ static bool OnlyUsedBy(Value *V, Value *Usr) {
 
 static void RemoveDeadConstant(Constant *C) {
   assert(C->use_empty() && "Constant is not dead!");
-  SmallPtrSet<Constant *, 4> Operands;
+  SmallPtrSet<Constant*, 4> Operands;
   for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
     if (isa<DerivedType>(C->getOperand(i)->getType()) &&
         OnlyUsedBy(C->getOperand(i), C)) 
-      Operands.insert(C->getOperand(i));
+      Operands.insert(cast<Constant>(C->getOperand(i)));
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
     if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
     GV->eraseFromParent();
@@ -126,7 +126,7 @@ static void RemoveDeadConstant(Constant *C) {
       C->destroyConstant();
 
   // If the constant referenced anything, see if we can delete it as well.
-  for (SmallPtrSet<Constant *, 4>::iterator OI = Operands.begin(),
+  for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
          OE = Operands.end(); OI != OE; ++OI)
     RemoveDeadConstant(*OI);
 }
@@ -290,23 +290,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
     Declare->eraseFromParent();
   }
 
-  // Delete all llvm.dbg.global_variables.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
-       I != E; ++I) {
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
-    if (!GV) continue;
-    if (GV->use_empty() && GV->getName().startswith("llvm.dbg.global_variable"))
-      DeadConstants.push_back(GV);
-  }
-
   while (!DeadConstants.empty()) {
     Constant *C = DeadConstants.back();
     DeadConstants.pop_back();
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
       if (GV->hasLocalLinkage())
         RemoveDeadConstant(GV);
-    }
-    else
+    } else
       RemoveDeadConstant(C);
   }
 
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index 4442820..67fc934 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -34,7 +34,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -44,7 +43,7 @@ namespace {
   /// SRETPromotion - This pass removes sret parameter and updates
   /// function to use multiple return value.
   ///
-  struct VISIBILITY_HIDDEN SRETPromotion : public CallGraphSCCPass {
+  struct SRETPromotion : public CallGraphSCCPass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp
index eb8f225..211a6d6 100644
--- a/lib/Transforms/Instrumentation/BlockProfiling.cpp
+++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp
@@ -22,7 +22,6 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "RSProfiling.h"
@@ -30,7 +29,7 @@
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN FunctionProfiler : public RSProfilers_std {
+  class FunctionProfiler : public RSProfilers_std {
   public:
     static char ID;
     bool runOnModule(Module &M);
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index b9cb275..9ae3786 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -20,7 +20,6 @@
 #include "ProfilingUtils.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
@@ -31,7 +30,7 @@ using namespace llvm;
 STATISTIC(NumEdgesInserted, "The # of edges inserted.");
 
 namespace {
-  class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass {
+  class EdgeProfiler : public ModulePass {
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index b2e6747..0a46fe5 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -33,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumEdgesInserted, "The # of edges inserted.");
 
 namespace {
-  class VISIBILITY_HIDDEN OptimalEdgeProfiler : public ModulePass {
+  class OptimalEdgeProfiler : public ModulePass {
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index 3b72260..c08efc1 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -42,7 +42,6 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -72,7 +71,7 @@ namespace {
   /// NullProfilerRS - The basic profiler that does nothing.  It is the default
   /// profiler and thus terminates RSProfiler chains.  It is useful for 
   /// measuring framework overhead
-  class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers {
+  class NullProfilerRS : public RSProfilers {
   public:
     static char ID; // Pass identification, replacement for typeid
     bool isProfiling(Value* v) {
@@ -94,7 +93,7 @@ static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
 
 namespace {
   /// Chooser - Something that chooses when to make a sample of the profiled code
-  class VISIBILITY_HIDDEN Chooser {
+  class Chooser {
   public:
     /// ProcessChoicePoint - is called for each basic block inserted to choose 
     /// between normal and sample code
@@ -108,7 +107,7 @@ namespace {
   //Things that implement sampling policies
   //A global value that is read-mod-stored to choose when to sample.
   //A sample is taken when the global counter hits 0
-  class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
+  class GlobalRandomCounter : public Chooser {
     GlobalVariable* Counter;
     Value* ResetValue;
     const IntegerType* T;
@@ -120,7 +119,7 @@ namespace {
   };
 
   //Same is GRC, but allow register allocation of the global counter
-  class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser {
+  class GlobalRandomCounterOpt : public Chooser {
     GlobalVariable* Counter;
     Value* ResetValue;
     AllocaInst* AI;
@@ -134,7 +133,7 @@ namespace {
 
   //Use the cycle counter intrinsic as a source of pseudo randomness when
   //deciding when to sample.
-  class VISIBILITY_HIDDEN CycleCounter : public Chooser {
+  class CycleCounter : public Chooser {
     uint64_t rm;
     Constant *F;
   public:
@@ -145,7 +144,7 @@ namespace {
   };
 
   /// ProfilerRS - Insert the random sampling framework
-  struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass {
+  struct ProfilerRS : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     ProfilerRS() : FunctionPass(&ID) {}
 
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
new file mode 100644
index 0000000..c8541d7
--- /dev/null
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -0,0 +1,1104 @@
+//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass removes redundant branch instructions. This algorithm was
+// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
+// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
+// Algorithm was created to remove array bound checks for strongly typed
+// languages. This implementation expands the idea and removes any conditional
+// branches that can be proved redundant, not only those used in array bound
+// checks. With the SSI representation, each variable has a
+// constraint. By analyzing these constraints we can prove that a branch is
+// redundant. When a branch is proved redundant it means that
+// one direction will always be taken; thus, we can change this branch into an
+// unconditional jump.
+// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
+// after ABCD to clean up the code.
+// This implementation was created based on the implementation of the ABCD
+// algorithm implemented for the compiler Jitrino.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "abcd"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SSI.h"
+
+using namespace llvm;
+
+STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
+STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
+
+namespace {
+
+class ABCD : public FunctionPass {
+ public:
+  static char ID;  // Pass identification, replacement for typeid.
+  ABCD() : FunctionPass(&ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<SSI>();
+  }
+
+  bool runOnFunction(Function &F);
+
+ private:
+  /// Keep track of whether we've modified the program yet.
+  bool modified;
+
+  enum ProveResult {
+    False = 0,
+    Reduced = 1,
+    True = 2
+  };
+
+  typedef ProveResult (*meet_function)(ProveResult, ProveResult);
+  static ProveResult max(ProveResult res1, ProveResult res2) {
+    return (ProveResult) std::max(res1, res2);
+  }
+  static ProveResult min(ProveResult res1, ProveResult res2) {
+    return (ProveResult) std::min(res1, res2);
+  }
+
+  class Bound {
+   public:
+    Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
+    Bound(const Bound *b, int cnst)
+      : value(b->value - cnst), upper_bound(b->upper_bound) {}
+    Bound(const Bound *b, const APInt &cnst)
+      : value(b->value - cnst), upper_bound(b->upper_bound) {}
+
+    /// Test if Bound is an upper bound
+    bool isUpperBound() const { return upper_bound; }
+
+    /// Get the bitwidth of this bound
+    int32_t getBitWidth() const { return value.getBitWidth(); }
+
+    /// Creates a Bound incrementing the one received
+    static Bound *createIncrement(const Bound *b) {
+      return new Bound(b->isUpperBound() ? b->value+1 : b->value-1,
+                       b->upper_bound);
+    }
+
+    /// Creates a Bound decrementing the one received
+    static Bound *createDecrement(const Bound *b) {
+      return new Bound(b->isUpperBound() ? b->value-1 : b->value+1,
+                       b->upper_bound);
+    }
+
+    /// Test if two bounds are equal
+    static bool eq(const Bound *a, const Bound *b) {
+      if (!a || !b) return false;
+
+      assert(a->isUpperBound() == b->isUpperBound());
+      return a->value == b->value;
+    }
+
+    /// Test if val is less than or equal to Bound b
+    static bool leq(APInt val, const Bound *b) {
+      if (!b) return false;
+      return b->isUpperBound() ? val.sle(b->value) : val.sge(b->value);
+    }
+
+    /// Test if Bound a is less then or equal to Bound
+    static bool leq(const Bound *a, const Bound *b) {
+      if (!a || !b) return false;
+
+      assert(a->isUpperBound() == b->isUpperBound());
+      return a->isUpperBound() ? a->value.sle(b->value) :
+                                 a->value.sge(b->value);
+    }
+
+    /// Test if Bound a is less then Bound b
+    static bool lt(const Bound *a, const Bound *b) {
+      if (!a || !b) return false;
+
+      assert(a->isUpperBound() == b->isUpperBound());
+      return a->isUpperBound() ? a->value.slt(b->value) :
+                                 a->value.sgt(b->value);
+    }
+
+    /// Test if Bound b is greater then or equal val
+    static bool geq(const Bound *b, APInt val) {
+      return leq(val, b);
+    }
+
+    /// Test if Bound a is greater then or equal Bound b
+    static bool geq(const Bound *a, const Bound *b) {
+      return leq(b, a);
+    }
+
+   private:
+    APInt value;
+    bool upper_bound;
+  };
+
+  /// This class is used to store results some parts of the graph,
+  /// so information does not need to be recalculated. The maximum false,
+  /// minimum true and minimum reduced results are stored
+  class MemoizedResultChart {
+   public:
+     MemoizedResultChart()
+       : max_false(NULL), min_true(NULL), min_reduced(NULL) {}
+
+    /// Returns the max false
+    Bound *getFalse() const { return max_false; }
+
+    /// Returns the min true
+    Bound *getTrue() const { return min_true; }
+
+    /// Returns the min reduced
+    Bound *getReduced() const { return min_reduced; }
+
+    /// Return the stored result for this bound
+    ProveResult getResult(const Bound *bound) const;
+
+    /// Stores a false found
+    void addFalse(Bound *bound);
+
+    /// Stores a true found
+    void addTrue(Bound *bound);
+
+    /// Stores a Reduced found
+    void addReduced(Bound *bound);
+
+    /// Clears redundant reduced
+    /// If a min_true is smaller than a min_reduced then the min_reduced
+    /// is unnecessary and then removed. It also works for min_reduced
+    /// begin smaller than max_false.
+    void clearRedundantReduced();
+
+    void clear() {
+      delete max_false;
+      delete min_true;
+      delete min_reduced;
+    }
+
+  private:
+    Bound *max_false, *min_true, *min_reduced;
+  };
+
+  /// This class stores the result found for a node of the graph,
+  /// so these results do not need to be recalculated, only searched for.
+  class MemoizedResult {
+  public:
+    /// Test if there is true result stored from b to a
+    /// that is less then the bound
+    bool hasTrue(Value *b, const Bound *bound) const {
+      Bound *trueBound = map.lookup(b).getTrue();
+      return trueBound && Bound::leq(trueBound, bound);
+    }
+
+    /// Test if there is false result stored from b to a
+    /// that is less then the bound
+    bool hasFalse(Value *b, const Bound *bound) const {
+      Bound *falseBound = map.lookup(b).getFalse();
+      return falseBound && Bound::leq(falseBound, bound);
+    }
+
+    /// Test if there is reduced result stored from b to a
+    /// that is less then the bound
+    bool hasReduced(Value *b, const Bound *bound) const {
+      Bound *reducedBound = map.lookup(b).getReduced();
+      return reducedBound && Bound::leq(reducedBound, bound);
+    }
+
+    /// Returns the stored bound for b
+    ProveResult getBoundResult(Value *b, Bound *bound) {
+      return map[b].getResult(bound);
+    }
+
+    /// Clears the map
+    void clear() {
+      DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
+      DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
+      for (; begin != end; ++begin) {
+	begin->second.clear();
+      }
+      map.clear();
+    }
+
+    /// Stores the bound found
+    void updateBound(Value *b, Bound *bound, const ProveResult res);
+
+  private:
+    // Maps a nod in the graph with its results found.
+    DenseMap<Value*, MemoizedResultChart> map;
+  };
+
+  /// This class represents an edge in the inequality graph used by the
+  /// ABCD algorithm. An edge connects node v to node u with a value c if
+  /// we could infer a constraint v <= u + c in the source program.
+  class Edge {
+  public:
+    Edge(Value *V, APInt val, bool upper)
+      : vertex(V), value(val), upper_bound(upper) {}
+
+    Value *getVertex() const { return vertex; }
+    const APInt &getValue() const { return value; }
+    bool isUpperBound() const { return upper_bound; }
+
+  private:
+    Value *vertex;
+    APInt value;
+    bool upper_bound;
+  };
+
+  /// Weighted and Directed graph to represent constraints.
+  /// There is one type of constraint, a <= b + X, which will generate an
+  /// edge from b to a with weight X.
+  class InequalityGraph {
+  public:
+
+    /// Adds an edge from V_from to V_to with weight value
+    void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
+
+    /// Test if there is a node V
+    bool hasNode(Value *V) const { return graph.count(V); }
+
+    /// Test if there is any edge from V in the upper direction
+    bool hasEdge(Value *V, bool upper) const;
+
+    /// Returns all edges pointed by vertex V
+    SmallPtrSet<Edge *, 16> getEdges(Value *V) const {
+      return graph.lookup(V);
+    }
+
+    /// Prints the graph in dot format.
+    /// Blue edges represent upper bound and Red lower bound.
+    void printGraph(raw_ostream &OS, Function &F) const {
+      printHeader(OS, F);
+      printBody(OS);
+      printFooter(OS);
+    }
+
+    /// Clear the graph
+    void clear() {
+      graph.clear();
+    }
+
+  private:
+    DenseMap<Value *, SmallPtrSet<Edge *, 16> > graph;
+
+    /// Adds a Node to the graph.
+    DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator addNode(Value *V) {
+      SmallPtrSet<Edge *, 16> p;
+      return graph.insert(std::make_pair(V, p)).first;
+    }
+
+    /// Prints the header of the dot file
+    void printHeader(raw_ostream &OS, Function &F) const;
+
+    /// Prints the footer of the dot file
+    void printFooter(raw_ostream &OS) const {
+      OS << "}\n";
+    }
+
+    /// Prints the body of the dot file
+    void printBody(raw_ostream &OS) const;
+
+    /// Prints vertex source to the dot file
+    void printVertex(raw_ostream &OS, Value *source) const;
+
+    /// Prints the edge to the dot file
+    void printEdge(raw_ostream &OS, Value *source, Edge *edge) const;
+
+    void printName(raw_ostream &OS, Value *info) const;
+  };
+
+  /// Iterates through all BasicBlocks, if the Terminator Instruction
+  /// uses an Comparator Instruction, all operands of this comparator
+  /// are sent to be transformed to SSI. Only Instruction operands are
+  /// transformed.
+  void createSSI(Function &F);
+
+  /// Creates the graphs for this function.
+  /// It will look for all comparators used in branches, and create them.
+  /// These comparators will create constraints for any instruction as an
+  /// operand.
+  void executeABCD(Function &F);
+
+  /// Seeks redundancies in the comparator instruction CI.
+  /// If the ABCD algorithm can prove that the comparator CI always
+  /// takes one way, then the Terminator Instruction TI is substituted from
+  /// a conditional branch to a unconditional one.
+  /// This code basically receives a comparator, and verifies which kind of
+  /// instruction it is. Depending on the kind of instruction, we use different
+  /// strategies to prove its redundancy.
+  void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
+
+  /// Substitutes Terminator Instruction TI, that is a conditional branch,
+  /// with one unconditional branch. Succ_edge determines if the new
+  /// unconditional edge will be the first or second edge of the former TI
+  /// instruction.
+  void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
+
+  /// When an conditional branch is removed, the BasicBlock that is no longer
+  /// reachable will have problems in phi functions. This method fixes these
+  /// phis removing the former BasicBlock from the list of incoming BasicBlocks
+  /// of all phis. In case the phi remains with no predecessor it will be
+  /// marked to be removed later.
+  void fixPhi(BasicBlock *BB, BasicBlock *Succ);
+
+  /// Removes phis that have no predecessor
+  void removePhis();
+
+  /// Creates constraints for Instructions.
+  /// If the constraint for this instruction has already been created
+  /// nothing is done.
+  void createConstraintInstruction(Instruction *I);
+
+  /// Creates constraints for Binary Operators.
+  /// It will create constraints only for addition and subtraction,
+  /// the other binary operations are not treated by ABCD.
+  /// For additions in the form a = b + X and a = X + b, where X is a constant,
+  /// the constraint a <= b + X can be obtained. For this constraint, an edge
+  /// a->b with weight X is added to the lower bound graph, and an edge
+  /// b->a with weight -X is added to the upper bound graph.
+  /// Only subtractions in the format a = b - X is used by ABCD.
+  /// Edges are created using the same semantic as addition.
+  void createConstraintBinaryOperator(BinaryOperator *BO);
+
+  /// Creates constraints for Comparator Instructions.
+  /// Only comparators that have any of the following operators
+  /// are used to create constraints: >=, >, <=, <. And only if
+  /// at least one operand is an Instruction. In a Comparator Instruction
+  /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
+  /// t and f represent sigma for operands in true and false branches. The
+  /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
+  /// b_f <= b. There are two more constraints that depend on the operator.
+  /// For the operator <= : a_t <= b_t   and b_f <= a_f-1
+  /// For the operator <  : a_t <= b_t-1 and b_f <= a_f
+  /// For the operator >= : b_t <= a_t   and a_f <= b_f-1
+  /// For the operator >  : b_t <= a_t-1 and a_f <= b_f
+  void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
+
+  /// Creates constraints for PHI nodes.
+  /// In a PHI node a = phi(b,c) we can create the constraint
+  /// a<= max(b,c). With this constraint there will be the edges,
+  /// b->a and c->a with weight 0 in the lower bound graph, and the edges
+  /// a->b and a->c with weight 0 in the upper bound graph.
+  void createConstraintPHINode(PHINode *PN);
+
+  /// Given a binary operator, we are only interest in the case
+  /// that one operand is an Instruction and the other is a ConstantInt. In
+  /// this case the method returns true, otherwise false. It also obtains the
+  /// Instruction and ConstantInt from the BinaryOperator and returns it.
+  bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
+				Instruction **I2, ConstantInt **C1,
+				ConstantInt **C2);
+
+  /// This method creates a constraint between a Sigma and an Instruction.
+  /// These constraints are created as soon as we find a comparator that uses a
+  /// SSI variable.
+  void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
+                               BasicBlock *BB_succ_f, PHINode **SIG_op_t,
+                               PHINode **SIG_op_f);
+
+  /// If PN_op1 and PN_o2 are different from NULL, create a constraint
+  /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
+  /// with the respective V_op#, if V_op# is a ConstantInt.
+  void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value);
+
+  /// Returns the sigma representing the Instruction I in BasicBlock BB.
+  /// Returns NULL in case there is no sigma for this Instruction in this
+  /// Basic Block. This methods assume that sigmas are the first instructions
+  /// in a block, and that there can be only two sigmas in a block. So it will
+  /// only look on the first two instructions of BasicBlock BB.
+  PHINode *findSigma(BasicBlock *BB, Instruction *I);
+
+  /// Original ABCD algorithm to prove redundant checks.
+  /// This implementation works on any kind of inequality branch.
+  bool demandProve(Value *a, Value *b, int c, bool upper_bound);
+
+  /// Prove that distance between b and a is <= bound
+  ProveResult prove(Value *a, Value *b, Bound *bound, unsigned level);
+
+  /// Updates the distance value for a and b
+  void updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level,
+                         meet_function meet);
+
+  InequalityGraph inequality_graph;
+  MemoizedResult mem_result;
+  DenseMap<Value*, Bound*> active;
+  SmallPtrSet<Value*, 16> created;
+  SmallVector<PHINode *, 16> phis_to_remove;
+};
+
+}  // end anonymous namespace.
+
+char ABCD::ID = 0;
+static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand");
+
+
+bool ABCD::runOnFunction(Function &F) {
+  modified = false;
+  createSSI(F);
+  executeABCD(F);
+  DEBUG(inequality_graph.printGraph(errs(), F));
+  removePhis();
+
+  inequality_graph.clear();
+  mem_result.clear();
+  active.clear();
+  created.clear();
+  phis_to_remove.clear();
+  return modified;
+}
+
+/// Iterates through all BasicBlocks, if the Terminator Instruction
+/// uses an Comparator Instruction, all operands of this comparator
+/// are sent to be transformed to SSI. Only Instruction operands are
+/// transformed.
+void ABCD::createSSI(Function &F) {
+  SSI *ssi = &getAnalysis<SSI>();
+
+  SmallVector<Instruction *, 16> Insts;
+
+  for (Function::iterator begin = F.begin(), end = F.end();
+       begin != end; ++begin) {
+    BasicBlock *BB = begin;
+    TerminatorInst *TI = BB->getTerminator();
+    if (TI->getNumOperands() == 0)
+      continue;
+
+    if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
+      if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
+        modified = true;  // XXX: but yet createSSI might do nothing
+        Insts.push_back(I);
+      }
+      if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
+        modified = true;
+        Insts.push_back(I);
+      }
+    }
+  }
+  ssi->createSSI(Insts);
+}
+
+/// Creates the graphs for this function.
+/// It will look for all comparators used in branches, and create them.
+/// These comparators will create constraints for any instruction as an
+/// operand.
+void ABCD::executeABCD(Function &F) {
+  for (Function::iterator begin = F.begin(), end = F.end();
+       begin != end; ++begin) {
+    BasicBlock *BB = begin;
+    TerminatorInst *TI = BB->getTerminator();
+    if (TI->getNumOperands() == 0)
+      continue;
+
+    ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
+    if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType()))
+      continue;
+
+    createConstraintCmpInst(ICI, TI);
+    seekRedundancy(ICI, TI);
+  }
+}
+
+/// Seeks redundancies in the comparator instruction CI.
+/// If the ABCD algorithm can prove that the comparator CI always
+/// takes one way, then the Terminator Instruction TI is substituted from
+/// a conditional branch to a unconditional one.
+/// This code basically receives a comparator, and verifies which kind of
+/// instruction it is. Depending on the kind of instruction, we use different
+/// strategies to prove its redundancy.
+void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
+  CmpInst::Predicate Pred = ICI->getPredicate();
+
+  Value *source, *dest;
+  int distance1, distance2;
+  bool upper;
+
+  switch(Pred) {
+    case CmpInst::ICMP_SGT: // signed greater than
+      upper = false;
+      distance1 = 1;
+      distance2 = 0;
+      break;
+
+    case CmpInst::ICMP_SGE: // signed greater or equal
+      upper = false;
+      distance1 = 0;
+      distance2 = -1;
+      break;
+
+    case CmpInst::ICMP_SLT: // signed less than
+      upper = true;
+      distance1 = -1;
+      distance2 = 0;
+      break;
+
+    case CmpInst::ICMP_SLE: // signed less or equal
+      upper = true;
+      distance1 = 0;
+      distance2 = 1;
+      break;
+
+    default:
+      return;
+  }
+
+  ++NumBranchTested;
+  source = ICI->getOperand(0);
+  dest = ICI->getOperand(1);
+  if (demandProve(dest, source, distance1, upper)) {
+    removeRedundancy(TI, true);
+  } else if (demandProve(dest, source, distance2, !upper)) {
+    removeRedundancy(TI, false);
+  }
+}
+
+/// Substitutes Terminator Instruction TI, that is a conditional branch,
+/// with one unconditional branch. Succ_edge determines if the new
+/// unconditional edge will be the first or second edge of the former TI
+/// instruction.
+void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
+  BasicBlock *Succ;
+  if (Succ_edge) {
+    Succ = TI->getSuccessor(0);
+    fixPhi(TI->getParent(), TI->getSuccessor(1));
+  } else {
+    Succ = TI->getSuccessor(1);
+    fixPhi(TI->getParent(), TI->getSuccessor(0));
+  }
+
+  BranchInst::Create(Succ, TI);
+  TI->eraseFromParent();  // XXX: invoke
+  ++NumBranchRemoved;
+  modified = true;
+}
+
+/// When an conditional branch is removed, the BasicBlock that is no longer
+/// reachable will have problems in phi functions. This method fixes these
+/// phis removing the former BasicBlock from the list of incoming BasicBlocks
+/// of all phis. In case the phi remains with no predecessor it will be
+/// marked to be removed later.
+void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
+  BasicBlock::iterator begin = Succ->begin();
+  while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
+    PN->removeIncomingValue(BB, false);
+    if (PN->getNumIncomingValues() == 0)
+      phis_to_remove.push_back(PN);
+  }
+}
+
+/// Removes phis that have no predecessor
+void ABCD::removePhis() {
+  for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
+    PHINode *PN = phis_to_remove[i];
+    PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    PN->eraseFromParent();
+  }
+}
+
+/// Creates constraints for Instructions.
+/// If the constraint for this instruction has already been created
+/// nothing is done.
+void ABCD::createConstraintInstruction(Instruction *I) {
+  // Test if this instruction has not been created before
+  if (created.insert(I)) {
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      createConstraintBinaryOperator(BO);
+    } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
+      createConstraintPHINode(PN);
+    }
+  }
+}
+
+/// Creates constraints for Binary Operators.
+/// It will create constraints only for addition and subtraction,
+/// the other binary operations are not treated by ABCD.
+/// For additions in the form a = b + X and a = X + b, where X is a constant,
+/// the constraint a <= b + X can be obtained. For this constraint, an edge
+/// a->b with weight X is added to the lower bound graph, and an edge
+/// b->a with weight -X is added to the upper bound graph.
+/// Only subtractions in the format a = b - X is used by ABCD.
+/// Edges are created using the same semantic as addition.
+void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
+  Instruction *I1 = NULL, *I2 = NULL;
+  ConstantInt *CI1 = NULL, *CI2 = NULL;
+
+  // Test if an operand is an Instruction and the other is a Constant
+  if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
+    return;
+
+  Instruction *I = 0;
+  APInt value;
+
+  switch (BO->getOpcode()) {
+    case Instruction::Add:
+      if (I1) {
+        I = I1;
+        value = CI2->getValue();
+      } else if (I2) {
+        I = I2;
+        value = CI1->getValue();
+      }
+      break;
+
+    case Instruction::Sub:
+      // Instructions like a = X-b, where X is a constant are not represented
+      // in the graph.
+      if (!I1)
+        return;
+
+      I = I1;
+      value = -CI2->getValue();
+      break;
+
+    default:
+      return;
+  }
+
+  inequality_graph.addEdge(I, BO, value, true);
+  inequality_graph.addEdge(BO, I, -value, false);
+  createConstraintInstruction(I);
+}
+
+/// Given a binary operator, we are only interest in the case
+/// that one operand is an Instruction and the other is a ConstantInt. In
+/// this case the method returns true, otherwise false. It also obtains the
+/// Instruction and ConstantInt from the BinaryOperator and returns it.
+bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
+                                    Instruction **I2, ConstantInt **C1,
+                                    ConstantInt **C2) {
+  Value *op1 = BO->getOperand(0);
+  Value *op2 = BO->getOperand(1);
+
+  if ((*I1 = dyn_cast<Instruction>(op1))) {
+    if ((*C2 = dyn_cast<ConstantInt>(op2)))
+      return true; // First is Instruction and second ConstantInt
+
+    return false; // Both are Instruction
+  } else {
+    if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
+        (*I2 = dyn_cast<Instruction>(op2)))
+      return true; // First is ConstantInt and second Instruction
+
+    return false; // Both are not Instruction
+  }
+}
+
+/// Creates constraints for Comparator Instructions.
+/// Only comparators that have any of the following operators
+/// are used to create constraints: >=, >, <=, <. And only if
+/// at least one operand is an Instruction. In a Comparator Instruction
+/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
+/// t and f represent sigma for operands in true and false branches. The
+/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
+/// b_f <= b. There are two more constraints that depend on the operator.
+/// For the operator <= : a_t <= b_t   and b_f <= a_f-1
+/// For the operator <  : a_t <= b_t-1 and b_f <= a_f
+/// For the operator >= : b_t <= a_t   and a_f <= b_f-1
+/// For the operator >  : b_t <= a_t-1 and a_f <= b_f
+void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
+  Value *V_op1 = ICI->getOperand(0);
+  Value *V_op2 = ICI->getOperand(1);
+
+  if (!isa<IntegerType>(V_op1->getType()))
+    return;
+
+  Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
+  Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
+
+  // Test if at least one operand is an Instruction
+  if (!I_op1 && !I_op2)
+    return;
+
+  BasicBlock *BB_succ_t = TI->getSuccessor(0);
+  BasicBlock *BB_succ_f = TI->getSuccessor(1);
+
+  PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
+          *SIG_op2_t = NULL, *SIG_op2_f = NULL;
+
+  createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
+  createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
+
+  int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
+  APInt MinusOne = APInt::getAllOnesValue(width);
+  APInt Zero = APInt::getNullValue(width);
+
+  CmpInst::Predicate Pred = ICI->getPredicate();
+  switch (Pred) {
+  case CmpInst::ICMP_SGT:  // signed greater than
+    createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne);
+    createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero);
+    break;
+
+  case CmpInst::ICMP_SGE:  // signed greater or equal
+    createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero);
+    createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne);
+    break;
+
+  case CmpInst::ICMP_SLT:  // signed less than
+    createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne);
+    createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero);
+    break;
+
+  case CmpInst::ICMP_SLE:  // signed less or equal
+    createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero);
+    createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne);
+    break;
+
+  default:
+    break;
+  }
+
+  if (I_op1)
+    createConstraintInstruction(I_op1);
+  if (I_op2)
+    createConstraintInstruction(I_op2);
+}
+
+/// Creates constraints for PHI nodes.
+/// In a PHI node a = phi(b,c) we can create the constraint
+/// a<= max(b,c). With this constraint there will be the edges,
+/// b->a and c->a with weight 0 in the lower bound graph, and the edges
+/// a->b and a->c with weight 0 in the upper bound graph.
+void ABCD::createConstraintPHINode(PHINode *PN) {
+  int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      createConstraintInstruction(I);
+    }
+    inequality_graph.addEdge(V, PN, APInt(width, 0), true);
+    inequality_graph.addEdge(V, PN, APInt(width, 0), false);
+  }
+}
+
+/// This method creates a constraint between a Sigma and an Instruction.
+/// These constraints are created as soon as we find a comparator that uses a
+/// SSI variable.
+void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
+                                   BasicBlock *BB_succ_f, PHINode **SIG_op_t,
+                                   PHINode **SIG_op_f) {
+  *SIG_op_t = findSigma(BB_succ_t, I_op);
+  *SIG_op_f = findSigma(BB_succ_f, I_op);
+
+  if (*SIG_op_t) {
+    int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
+    inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
+    inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
+    created.insert(*SIG_op_t);
+  }
+  if (*SIG_op_f) {
+    int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
+    inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
+    inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
+    created.insert(*SIG_op_f);
+  }
+}
+
+/// If PN_op1 and PN_o2 are different from NULL, create a constraint
+/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
+/// with the respective V_op#, if V_op# is a ConstantInt.
+void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+                                  APInt value) {
+  if (SIG_op1 && SIG_op2) {
+    inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
+    inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
+  }
+}
+
+/// Returns the sigma representing the Instruction I in BasicBlock BB.
+/// Returns NULL in case there is no sigma for this Instruction in this
+/// Basic Block. This methods assume that sigmas are the first instructions
+/// in a block, and that there can be only two sigmas in a block. So it will
+/// only look on the first two instructions of BasicBlock BB.
+PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
+  // BB has more than one predecessor, BB cannot have sigmas.
+  if (I == NULL || BB->getSinglePredecessor() == NULL)
+    return NULL;
+
+  BasicBlock::iterator begin = BB->begin();
+  BasicBlock::iterator end = BB->end();
+
+  for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
+    Instruction *I_succ = begin;
+    if (PHINode *PN = dyn_cast<PHINode>(I_succ))
+      if (PN->getIncomingValue(0) == I)
+        return PN;
+  }
+
+  return NULL;
+}
+
+/// Original ABCD algorithm to prove redundant checks.
+/// This implementation works on any kind of inequality branch.
+bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
+  int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
+  Bound *bound = new Bound(APInt(width, c), upper_bound);
+
+  mem_result.clear();
+  active.clear();
+
+  ProveResult res = prove(a, b, bound, 0);
+  return res != False;
+}
+
+/// Prove that distance between b and a is <= bound
+ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound,
+                              unsigned level) {
+  // if (C[b-a<=e] == True for some e <= bound
+  // Same or stronger difference was already proven
+  if (mem_result.hasTrue(b, bound))
+    return True;
+
+  // if (C[b-a<=e] == False for some e >= bound
+  // Same or weaker difference was already disproved
+  if (mem_result.hasFalse(b, bound))
+    return False;
+
+  // if (C[b-a<=e] == Reduced for some e <= bound
+  // b is on a cycle that was reduced for same or stronger difference
+  if (mem_result.hasReduced(b, bound))
+    return Reduced;
+
+  // traversal reached the source vertex
+  if (a == b && Bound::geq(bound, APInt(bound->getBitWidth(), 0, true)))
+    return True;
+
+  // if b has no predecessor then fail
+  if (!inequality_graph.hasEdge(b, bound->isUpperBound()))
+    return False;
+
+  // a cycle was encountered
+  if (active.count(b)) {
+    if (Bound::leq(active.lookup(b), bound))
+      return Reduced; // a "harmless" cycle
+
+    return False; // an amplifying cycle
+  }
+
+  active[b] = bound;
+  PHINode *PN = dyn_cast<PHINode>(b);
+
+  // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
+  // value, then it is a phi, if it has 1 incoming value it is a sigma.
+  if (PN && PN->getNumIncomingValues() > 1)
+    updateMemDistance(a, b, bound, level, min);
+  else
+    updateMemDistance(a, b, bound, level, max);
+
+  active.erase(b);
+
+  ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
+  return res;
+}
+
+/// Updates the distance value for a and b
+void ABCD::updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level,
+                             meet_function meet) {
+  ABCD::ProveResult res = (meet == max) ? False : True;
+
+  SmallPtrSet<Edge *, 16> Edges = inequality_graph.getEdges(b);
+  SmallPtrSet<Edge *, 16>::iterator begin = Edges.begin(), end = Edges.end();
+
+  for (; begin != end ; ++begin) {
+    if (((res >= Reduced) && (meet == max)) ||
+       ((res == False) && (meet == min))) {
+      break;
+    }
+    Edge *in = *begin;
+    if (in->isUpperBound() == bound->isUpperBound()) {
+      Value *succ = in->getVertex();
+      res = meet(res, prove(a, succ, new Bound(bound, in->getValue()),
+                 level+1));
+    }
+  }
+
+  mem_result.updateBound(b, bound, res);
+}
+
+/// Return the stored result for this bound
+ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound *bound)const{
+  if (max_false && Bound::leq(bound, max_false))
+    return False;
+  if (min_true && Bound::leq(min_true, bound))
+    return True;
+  if (min_reduced && Bound::leq(min_reduced, bound))
+    return Reduced;
+  return False;
+}
+
+/// Stores a false found
+void ABCD::MemoizedResultChart::addFalse(Bound *bound) {
+  if (!max_false || Bound::leq(max_false, bound))
+    max_false = bound;
+
+  if (Bound::eq(max_false, min_reduced))
+    min_reduced = Bound::createIncrement(min_reduced);
+  if (Bound::eq(max_false, min_true))
+    min_true = Bound::createIncrement(min_true);
+  if (Bound::eq(min_reduced, min_true))
+    min_reduced = NULL;
+  clearRedundantReduced();
+}
+
+/// Stores a true found
+void ABCD::MemoizedResultChart::addTrue(Bound *bound) {
+  if (!min_true || Bound::leq(bound, min_true))
+    min_true = bound;
+
+  if (Bound::eq(min_true, min_reduced))
+    min_reduced = Bound::createDecrement(min_reduced);
+  if (Bound::eq(min_true, max_false))
+    max_false = Bound::createDecrement(max_false);
+  if (Bound::eq(max_false, min_reduced))
+    min_reduced = NULL;
+  clearRedundantReduced();
+}
+
+/// Stores a Reduced found
+void ABCD::MemoizedResultChart::addReduced(Bound *bound) {
+  if (!min_reduced || Bound::leq(bound, min_reduced))
+    min_reduced = bound;
+
+  if (Bound::eq(min_reduced, min_true))
+    min_true = Bound::createIncrement(min_true);
+  if (Bound::eq(min_reduced, max_false))
+    max_false = Bound::createDecrement(max_false);
+}
+
+/// Clears redundant reduced
+/// If a min_true is smaller than a min_reduced then the min_reduced
+/// is unnecessary and then removed. It also works for min_reduced
+/// begin smaller than max_false.
+void ABCD::MemoizedResultChart::clearRedundantReduced() {
+  if (min_true && min_reduced && Bound::lt(min_true, min_reduced))
+    min_reduced = NULL;
+  if (max_false && min_reduced && Bound::lt(min_reduced, max_false))
+    min_reduced = NULL;
+}
+
+/// Stores the bound found
+void ABCD::MemoizedResult::updateBound(Value *b, Bound *bound,
+                                       const ProveResult res) {
+  if (res == False) {
+    map[b].addFalse(bound);
+  } else if (res == True) {
+    map[b].addTrue(bound);
+  } else {
+    map[b].addReduced(bound);
+  }
+}
+
+/// Adds an edge from V_from to V_to with weight value
+void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
+                                    APInt value, bool upper) {
+  assert(V_from->getType() == V_to->getType());
+  assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
+         value.getBitWidth());
+
+  DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator from;
+  from = addNode(V_from);
+  from->second.insert(new Edge(V_to, value, upper));
+}
+
+/// Test if there is any edge from V in the upper direction
+bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
+  SmallPtrSet<Edge *, 16> it = graph.lookup(V);
+
+  SmallPtrSet<Edge *, 16>::iterator begin = it.begin();
+  SmallPtrSet<Edge *, 16>::iterator end = it.end();
+  for (; begin != end; ++begin) {
+    if ((*begin)->isUpperBound() == upper) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// Prints the header of the dot file
+void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
+  OS << "digraph dotgraph {\n";
+  OS << "label=\"Inequality Graph for \'";
+  OS << F.getNameStr() << "\' function\";\n";
+  OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
+}
+
+/// Prints the body of the dot file
+void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
+  DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin =
+      graph.begin(), end = graph.end();
+
+  for (; begin != end ; ++begin) {
+    SmallPtrSet<Edge *, 16>::iterator begin_par =
+        begin->second.begin(), end_par = begin->second.end();
+    Value *source = begin->first;
+
+    printVertex(OS, source);
+
+    for (; begin_par != end_par ; ++begin_par) {
+      Edge *edge = *begin_par;
+      printEdge(OS, source, edge);
+    }
+  }
+}
+
+/// Prints vertex source to the dot file
+///
+void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
+  OS << "\"";
+  printName(OS, source);
+  OS << "\"";
+  OS << " [label=\"{";
+  printName(OS, source);
+  OS << "}\"];\n";
+}
+
+/// Prints the edge to the dot file
+void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
+                                      Edge *edge) const {
+  Value *dest = edge->getVertex();
+  APInt value = edge->getValue();
+  bool upper = edge->isUpperBound();
+
+  OS << "\"";
+  printName(OS, source);
+  OS << "\"";
+  OS << " -> ";
+  OS << "\"";
+  printName(OS, dest);
+  OS << "\"";
+  OS << " [label=\"" << value << "\"";
+  if (upper) {
+    OS << "color=\"blue\"";
+  } else {
+    OS << "color=\"red\"";
+  }
+  OS << "];\n";
+}
+
+void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
+    OS << *CI;
+  } else {
+    if (!info->hasName()) {
+      info->setName("V");
+    }
+    OS << info->getNameStr();
+  }
+}
+
+/// createABCDPass - The public interface to this file...
+FunctionPass *llvm::createABCDPass() {
+  return new ABCD();
+}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index cbeed4c..e048518 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,12 +1,13 @@
 add_llvm_library(LLVMScalarOpts
+  ABCD.cpp
   ADCE.cpp
   BasicBlockPlacement.cpp
-  CodeGenLICM.cpp
   CodeGenPrepare.cpp
   CondPropagate.cpp
   ConstantProp.cpp
   DCE.cpp
   DeadStoreElimination.cpp
+  GEPSplitter.cpp
   GVN.cpp
   IndVarSimplify.cpp
   InstructionCombining.cpp
@@ -16,12 +17,13 @@ add_llvm_library(LLVMScalarOpts
   LoopIndexSplit.cpp
   LoopRotation.cpp
   LoopStrengthReduce.cpp
-  LoopUnroll.cpp
+  LoopUnrollPass.cpp
   LoopUnswitch.cpp
   MemCpyOptimizer.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
+  SCCVN.cpp
   Scalar.cpp
   ScalarReplAggregates.cpp
   SimplifyCFGPass.cpp
diff --git a/lib/Transforms/Scalar/CodeGenLICM.cpp b/lib/Transforms/Scalar/CodeGenLICM.cpp
deleted file mode 100644
index 10f950e..0000000
--- a/lib/Transforms/Scalar/CodeGenLICM.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- CodeGenLICM.cpp - LICM a function for code generation --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This function performs late LICM, hoisting constants out of loops that
-// are not valid immediates. It should not be followed by instcombine,
-// because instcombine would quickly stuff the constants back into the loop.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "codegen-licm"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/ADT/DenseMap.h"
-using namespace llvm;
-
-namespace {
-  class CodeGenLICM : public LoopPass {
-    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    explicit CodeGenLICM() : LoopPass(&ID) {}
-  };
-}
-
-char CodeGenLICM::ID = 0;
-static RegisterPass<CodeGenLICM> X("codegen-licm",
-                                   "hoist constants out of loops");
-
-Pass *llvm::createCodeGenLICMPass() {
-  return new CodeGenLICM();
-}
-
-bool CodeGenLICM::runOnLoop(Loop *L, LPPassManager &) {
-  bool Changed = false;
-
-  // Only visit outermost loops.
-  if (L->getParentLoop()) return Changed;
-
-  Instruction *PreheaderTerm = L->getLoopPreheader()->getTerminator();
-  DenseMap<Constant *, BitCastInst *> HoistedConstants;
-
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-    for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
-         BBI != BBE; ++BBI) {
-      Instruction *I = BBI;
-      // TODO: For now, skip all intrinsic instructions, because some of them
-      // can require their operands to be constants, and we don't want to
-      // break that.
-      if (isa<IntrinsicInst>(I))
-        continue;
-      // LLVM represents fneg as -0.0-x; don't hoist the -0.0 out.
-      if (BinaryOperator::isFNeg(I) ||
-          BinaryOperator::isNeg(I) ||
-          BinaryOperator::isNot(I))
-        continue;
-      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-        // Don't hoist out switch case constants.
-        if (isa<SwitchInst>(I) && i == 1)
-          break;
-        // Don't hoist out shuffle masks.
-        if (isa<ShuffleVectorInst>(I) && i == 2)
-          break;
-        Value *Op = I->getOperand(i);
-        Constant *C = dyn_cast<Constant>(Op);
-        if (!C) continue;
-        // TODO: Ask the target which constants are legal. This would allow
-        // us to add support for hoisting ConstantInts and GlobalValues too.
-        if (isa<ConstantFP>(C) ||
-            isa<ConstantVector>(C) ||
-            isa<ConstantAggregateZero>(C)) {
-          BitCastInst *&BC = HoistedConstants[C];
-          if (!BC)
-            BC = new BitCastInst(C, C->getType(), "hoist", PreheaderTerm);
-          I->setOperand(i, BC);
-          Changed = true;
-        }
-      }
-    }
-  }
-
-  return Changed;
-}
-
-void CodeGenLICM::getAnalysisUsage(AnalysisUsage &AU) const {
-  // This pass preserves just about everything. List some popular things here.
-  AU.setPreservesCFG();
-  AU.addPreservedID(LoopSimplifyID);
-  AU.addPreserved<LoopInfo>();
-  AU.addPreserved<AliasAnalysis>();
-  AU.addPreserved("scalar-evolution");
-  AU.addPreserved("iv-users");
-  AU.addPreserved("lda");
-  AU.addPreserved("live-values");
-
-  // Hoisting requires a loop preheader.
-  AU.addRequiredID(LoopSimplifyID);
-}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 42209b8..9ca90c3 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -318,6 +318,7 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
     if (Invoke->getSuccessor(1) == Dest)
       return;
   }
+  
 
   // As a hack, never split backedges of loops.  Even though the copy for any
   // PHIs inserted on the backedge would be dead for exits from the loop, we
@@ -852,7 +853,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
 
   // Split all critical edges where the dest block has a PHI.
   TerminatorInst *BBTI = BB.getTerminator();
-  if (BBTI->getNumSuccessors() > 1) {
+  if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
     for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
       BasicBlock *SuccBB = BBTI->getSuccessor(i);
       if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
index 5b573f4..8a6c556 100644
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ b/lib/Transforms/Scalar/CondPropagate.cpp
@@ -196,18 +196,20 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {
   // possible, and to avoid invalidating "i".
   for (unsigned i = PN->getNumIncomingValues(); i != 0; --i)
     if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) {
-      // If we have a constant, forward the edge from its current to its
-      // ultimate destination.
-      unsigned DestCase = SI->findCaseValue(CI);
-      RevectorBlockTo(PN->getIncomingBlock(i-1),
-                      SI->getSuccessor(DestCase));
-      ++NumSwThread;
-
-      // If there were two predecessors before this simplification, or if the
-      // PHI node contained all the same value except for the one we just
-      // substituted, the PHI node may be deleted.  Don't iterate through it the
-      // last time.
-      if (SI->getCondition() != PN) return;
+      BasicBlock *PredBB = PN->getIncomingBlock(i-1);
+      if (isa<BranchInst>(PredBB->getTerminator())) {
+        // If we have a constant, forward the edge from its current to its
+        // ultimate destination.
+        unsigned DestCase = SI->findCaseValue(CI);
+        RevectorBlockTo(PredBB, SI->getSuccessor(DestCase));
+        ++NumSwThread;
+
+        // If there were two predecessors before this simplification, or if the
+        // PHI node contained all the same value except for the one we just
+        // substituted, the PHI node may be deleted.  Don't iterate through it the
+        // last time.
+        if (SI->getCondition() != PN) return;
+      }
     }
 }
 
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a7b3e75..60b12fd 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -49,7 +50,7 @@ namespace {
     }
     
     bool runOnBasicBlock(BasicBlock &BB);
-    bool handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep);
+    bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep);
     bool handleEndBlock(BasicBlock &BB);
     bool RemoveUndeadPointers(Value* Ptr, uint64_t killPointerSize,
                               BasicBlock::iterator& BBI,
@@ -88,7 +89,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     Instruction *Inst = BBI++;
     
     // If we find a store or a free, get its memory dependence.
-    if (!isa<StoreInst>(Inst) && !isa<FreeInst>(Inst))
+    if (!isa<StoreInst>(Inst) && !isFreeCall(Inst))
       continue;
     
     // Don't molest volatile stores or do queries that will return "clobber".
@@ -103,8 +104,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     if (InstDep.isNonLocal()) continue;
   
     // Handle frees whose dependencies are non-trivial.
-    if (FreeInst *FI = dyn_cast<FreeInst>(Inst)) {
-      MadeChange |= handleFreeWithNonTrivialDependency(FI, InstDep);
+    if (isFreeCall(Inst)) {
+      MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep);
       continue;
     }
     
@@ -153,6 +154,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
         continue;
       }
     }
+    
+    // If this is a lifetime end marker, we can throw away the store.
+    if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        // Delete the store and now-dead instructions that feed it.
+        // DeleteDeadInstruction can delete the current instruction.  Save BBI
+        // in case we need it.
+        WeakVH NextInst(BBI);
+        
+        DeleteDeadInstruction(SI);
+        
+        if (NextInst == 0)  // Next instruction deleted.
+          BBI = BB.begin();
+        else if (BBI != BB.begin())  // Revisit this instruction if possible.
+          --BBI;
+        NumFastStores++;
+        MadeChange = true;
+        continue;
+      }
+    }
   }
   
   // If this block ends in a return, unwind, or unreachable, all allocas are
@@ -165,7 +186,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
 
 /// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
 /// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) {
+bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   
   StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst());
@@ -175,7 +196,7 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) {
   Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject();
 
   // Check for aliasing.
-  if (AA.alias(F->getPointerOperand(), 1, DepPointer, 1) !=
+  if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
          AliasAnalysis::MustAlias)
     return false;
   
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
new file mode 100644
index 0000000..610a41d
--- /dev/null
+++ b/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -0,0 +1,81 @@
+//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This function breaks GEPs with more than 2 non-zero operands into smaller
+// GEPs each with no more than 2 non-zero operands. This exposes redundancy
+// between GEPs with common initial operand sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "split-geps"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  class GEPSplitter : public FunctionPass {
+    virtual bool runOnFunction(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit GEPSplitter() : FunctionPass(&ID) {}
+  };
+}
+
+char GEPSplitter::ID = 0;
+static RegisterPass<GEPSplitter> X("split-geps",
+                                   "split complex GEPs into simple GEPs");
+
+FunctionPass *llvm::createGEPSplitterPass() {
+  return new GEPSplitter();
+}
+
+bool GEPSplitter::runOnFunction(Function &F) {
+  bool Changed = false;
+
+  // Visit each GEP instruction.
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
+        unsigned NumOps = GEP->getNumOperands();
+        // Ignore GEPs which are already simple.
+        if (NumOps <= 2)
+          continue;
+        bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
+                                cast<ConstantInt>(GEP->getOperand(1))->isZero();
+        if (NumOps == 3 && FirstIndexIsZero)
+          continue;
+        // The first index is special and gets expanded with a 2-operand GEP
+        // (unless it's zero, in which case we can skip this).
+        Value *NewGEP = FirstIndexIsZero ?
+          GEP->getOperand(0) :
+          GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
+                                    "tmp", GEP);
+        // All remaining indices get expanded with a 3-operand GEP with zero
+        // as the second operand.
+        Value *Idxs[2];
+        Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
+        for (unsigned i = 2; i != NumOps; ++i) {
+          Idxs[1] = GEP->getOperand(i);
+          NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
+        }
+        GEP->replaceAllUsesWith(NewGEP);
+        GEP->eraseFromParent();
+        Changed = true;
+      }
+
+  return Changed;
+}
+
+void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 8859324..0e3f750 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -33,7 +33,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
@@ -669,9 +669,10 @@ namespace {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    GVN() : FunctionPass(&ID) { }
+    GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { }
 
   private:
+    bool NoPRE;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
 
@@ -710,7 +711,7 @@ namespace {
 }
 
 // createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass() { return new GVN(); }
+FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); }
 
 static RegisterPass<GVN> X("gvn",
                            "Global Value Numbering");
@@ -1243,11 +1244,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     Instruction *DepInst = DepInfo.getInst();
 
     // Loading the allocation -> undef.
-    if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
+    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                              UndefValue::get(LI->getType())));
       continue;
     }
+    
+    // Loading immediately after lifetime begin or end -> undef.
+    if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+          II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                             UndefValue::get(LI->getType())));
+      }
+    }
 
     if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
       // Reject loads and stores that are to the same address but are of
@@ -1585,12 +1595,24 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // If this load really doesn't depend on anything, then we must be loading an
   // undef value.  This can happen when loading for a fresh allocation with no
   // intervening stores, for example.
-  if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
+  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
     L->replaceAllUsesWith(UndefValue::get(L->getType()));
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
   }
+  
+  // If this load occurs either right after a lifetime begin or a lifetime end,
+  // then the loaded value is undefined.
+  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+        II->getIntrinsicID() == Intrinsic::lifetime_end) {
+      L->replaceAllUsesWith(UndefValue::get(L->getType()));
+      toErase.push_back(L);
+      NumGVNLoad++;
+      return true;
+    }
+  }
 
   return false;
 }
@@ -1653,7 +1675,7 @@ bool GVN::processInstruction(Instruction *I,
 
   // Allocations are always uniquely numbered, so we can save time and memory
   // by fast failing them.
-  } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
+  } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
     localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
     return false;
   }
@@ -1788,7 +1810,7 @@ bool GVN::processBlock(BasicBlock *BB) {
 
 /// performPRE - Perform a purely local form of PRE that looks for diamond
 /// control flow patterns and attempts to perform simple PRE at the join point.
-bool GVN::performPRE(Function& F) {
+bool GVN::performPRE(Function &F) {
   bool Changed = false;
   SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
   DenseMap<BasicBlock*, Value*> predMap;
@@ -1803,7 +1825,7 @@ bool GVN::performPRE(Function& F) {
          BE = CurrentBlock->end(); BI != BE; ) {
       Instruction *CurInst = BI++;
 
-      if (isa<AllocationInst>(CurInst) ||
+      if (isa<AllocaInst>(CurInst) ||
           isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
           CurInst->getType()->isVoidTy() ||
           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
@@ -1853,6 +1875,10 @@ bool GVN::performPRE(Function& F) {
       // we would need to insert instructions in more than one pred.
       if (NumWithout != 1 || NumWith == 0)
         continue;
+      
+      // Don't do PRE across indirect branch.
+      if (isa<IndirectBrInst>(PREPred->getTerminator()))
+        continue;
 
       // We can't do PRE safely on a critical edge, so instead we schedule
       // the edge to be split and perform the PRE the next time we iterate
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index e2d9e0b..b0bc70c 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -292,7 +292,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
       if (NumPreds != 1) {
         // Clone the PHI and delete the original one. This lets IVUsers and
         // any other maps purge the original user from their records.
-        PHINode *NewPN = PN->clone();
+        PHINode *NewPN = cast<PHINode>(PN->clone());
         NewPN->takeName(PN);
         NewPN->insertBefore(PN);
         PN->replaceAllUsesWith(NewPN);
@@ -322,7 +322,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
   // may not have been able to compute a trip count. Now that we've done some
   // re-writing, the trip count may be computable.
   if (Changed)
-    SE->forgetLoopBackedgeTakenCount(L);
+    SE->forgetLoop(L);
 }
 
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index b41b5d4..7e75cfb 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -42,7 +42,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -217,6 +217,7 @@ namespace {
     //
     Instruction *visitAdd(BinaryOperator &I);
     Instruction *visitFAdd(BinaryOperator &I);
+    Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
     Instruction *visitSub(BinaryOperator &I);
     Instruction *visitFSub(BinaryOperator &I);
     Instruction *visitMul(BinaryOperator &I);
@@ -284,8 +285,8 @@ namespace {
     Instruction *visitInvokeInst(InvokeInst &II);
     Instruction *visitPHINode(PHINode &PN);
     Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
-    Instruction *visitAllocationInst(AllocationInst &AI);
-    Instruction *visitFreeInst(FreeInst &FI);
+    Instruction *visitAllocaInst(AllocaInst &AI);
+    Instruction *visitFree(Instruction &FI);
     Instruction *visitLoadInst(LoadInst &LI);
     Instruction *visitStoreInst(StoreInst &SI);
     Instruction *visitBranchInst(BranchInst &BI);
@@ -416,6 +417,7 @@ namespace {
     Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
     Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
     Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+    Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
 
     
     Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
@@ -425,7 +427,7 @@ namespace {
                               bool isSub, Instruction &I);
     Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                  bool isSigned, bool Inside, Instruction &IB);
-    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
+    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
     Instruction *MatchBSwap(BinaryOperator &I);
     bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
     Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
@@ -630,9 +632,32 @@ static inline Value *dyn_castFNegVal(Value *V) {
   return 0;
 }
 
-static inline Value *dyn_castNotVal(Value *V) {
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to).  This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+  // ~(~(X)) -> X.
   if (BinaryOperator::isNot(V))
-    return BinaryOperator::getNotArgument(V);
+    return true;
+  
+  // Constants can be considered to be not'ed values.
+  if (isa<ConstantInt>(V))
+    return true;
+  
+  // Compares can be inverted if they have a single use.
+  if (CmpInst *CI = dyn_cast<CmpInst>(V))
+    return CI->hasOneUse();
+  
+  return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+  // If this is not(not(x)) don't return that this is a not: we want the two
+  // not's to be folded first.
+  if (BinaryOperator::isNot(V)) {
+    Value *Operand = BinaryOperator::getNotArgument(V);
+    if (!isFreeToInvert(Operand))
+      return Operand;
+  }
 
   // Constants can be considered to be not'ed values...
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
@@ -640,6 +665,8 @@ static inline Value *dyn_castNotVal(Value *V) {
   return 0;
 }
 
+
+
 // dyn_castFoldableMul - If this value is a multiply that can be folded into
 // other computations (because it has a constant operand), return the
 // non-constant operand of the multiply, and set CST to point to the multiplier.
@@ -2394,8 +2421,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new, smaller add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
-                                           CI, "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              CI, "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
@@ -2410,8 +2437,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
-                                           RHSConv->getOperand(0), "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0), "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
@@ -2467,8 +2494,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
           ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),
-                                           CI, "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+                                              CI, "addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
@@ -2483,8 +2510,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
-                                           RHSConv->getOperand(0), "addconv");
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0),"addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
@@ -2493,13 +2520,210 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer).  Return the result as a signed integer of intptr size.
+static Value *EmitGEPOffset(User *GEP, InstCombiner &IC) {
+  TargetData &TD = *IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+  Value *Result = Constant::getNullValue(IntPtrTy);
+
+  // Build a mask for high order bits.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+       ++i, ++GTI) {
+    Value *Op = *i;
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        Result = IC.Builder->CreateAdd(Result,
+                                       ConstantInt::get(IntPtrTy, Size),
+                                       GEP->getName()+".offs");
+        continue;
+      }
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC =
+              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      // Emit an add instruction.
+      Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy)
+      Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+    if (Size != 1) {
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      // We'll let instcombine(mul) convert this to a shl if possible.
+      Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
+    }
+
+    // Emit an add instruction.
+    Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
+  }
+  return Result;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
+/// be complex, and scales are involved.  The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+                                          InstCombiner &IC) {
+  TargetData &TD = *IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
+
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
+      }
+    } else {
+      // Found our variable index.
+      break;
+    }
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+   
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
+    }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+      VariableIdx = new TruncInst(VariableIdx, 
+                                  TD.getIntPtrType(VariableIdx->getContext()),
+                                  VariableIdx->getName(), &I);
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+                                              true /*SExt*/, 
+                                              VariableIdx->getName(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
+/// Optimize pointer differences into the same array into a size.  Consider:
+///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+                                               const Type *Ty) {
+  assert(TD && "Must have target data info for this");
+  
+  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+  // this.
+  bool Swapped;
+  GetElementPtrInst *GEP;
+  
+  if ((GEP = dyn_cast<GetElementPtrInst>(LHS)) &&
+      GEP->getOperand(0) == RHS)
+    Swapped = false;
+  else if ((GEP = dyn_cast<GetElementPtrInst>(RHS)) &&
+           GEP->getOperand(0) == LHS)
+    Swapped = true;
+  else
+    return 0;
+  
+  // TODO: Could also optimize &A[i] - &A[j] -> "i-j".
+  
+  // Emit the offset of the GEP and an intptr_t.
+  Value *Result = EmitGEPOffset(GEP, *this);
+
+  // If we have p - gep(p, ...)  then we have to negate the result.
+  if (Swapped)
+    Result = Builder->CreateNeg(Result, "diff.neg");
+
+  return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
 Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Op0 == Op1)                        // sub X, X  -> 0
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
-  // If this is a 'B = x-(-A)', change to B = x+A...
+  // If this is a 'B = x-(-A)', change to B = x+A.
   if (Value *V = dyn_castNegVal(Op1))
     return BinaryOperator::CreateAdd(Op0, V);
 
@@ -2507,9 +2731,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
-
+  if (I.getType() == Type::getInt1Ty(*Context))
+    return BinaryOperator::CreateXor(Op0, Op1);
+  
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
-    // Replace (-1 - A) with (~A)...
+    // Replace (-1 - A) with (~A).
     if (C->isAllOnesValue())
       return BinaryOperator::CreateNot(Op1);
 
@@ -2532,8 +2758,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
                                           SI->getOperand(0), CU, SI->getName());
             }
           }
-        }
-        else if (SI->getOpcode() == Instruction::AShr) {
+        } else if (SI->getOpcode() == Instruction::AShr) {
           if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
             // Check to see if we are shifting out everything but the sign bit.
             if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
@@ -2558,9 +2783,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
   }
 
-  if (I.getType() == Type::getInt1Ty(*Context))
-    return BinaryOperator::CreateXor(Op0, Op1);
-
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
     if (Op1I->getOpcode() == Instruction::Add) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
@@ -2642,6 +2864,28 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     if (X == dyn_castFoldableMul(Op1, C2))
       return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
+  
+  // Optimize pointer differences into the same array into a size.  Consider:
+  //  &A[10] - &A[0]: we should compile this to "10".
+  if (TD) {
+    if (PtrToIntInst *LHS = dyn_cast<PtrToIntInst>(Op0))
+      if (PtrToIntInst *RHS = dyn_cast<PtrToIntInst>(Op1))
+        if (Value *Res = OptimizePointerDifference(LHS->getOperand(0),
+                                                   RHS->getOperand(0),
+                                                   I.getType()))
+          return ReplaceInstUsesWith(I, Res);
+    
+    // trunc(p)-trunc(q) -> trunc(p-q)
+    if (TruncInst *LHST = dyn_cast<TruncInst>(Op0))
+      if (TruncInst *RHST = dyn_cast<TruncInst>(Op1))
+        if (PtrToIntInst *LHS = dyn_cast<PtrToIntInst>(LHST->getOperand(0)))
+          if (PtrToIntInst *RHS = dyn_cast<PtrToIntInst>(RHST->getOperand(0)))
+            if (Value *Res = OptimizePointerDifference(LHS->getOperand(0),
+                                                       RHS->getOperand(0),
+                                                       I.getType()))
+              return ReplaceInstUsesWith(I, Res);
+  }
+  
   return 0;
 }
 
@@ -3510,9 +3754,9 @@ static Value *getFCmpValue(bool isordered, unsigned code,
 /// PredicatesFoldable - Return true if both predicates match sign or if at
 /// least one of them is an equality comparison (which is signless).
 static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
-  return (ICmpInst::isSignedPredicate(p1) == ICmpInst::isSignedPredicate(p2)) ||
-         (ICmpInst::isSignedPredicate(p1) && ICmpInst::isEquality(p2)) ||
-         (ICmpInst::isSignedPredicate(p2) && ICmpInst::isEquality(p1));
+  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
 }
 
 namespace { 
@@ -3549,9 +3793,7 @@ struct FoldICmpLogical {
     default: llvm_unreachable("Illegal logical opcode!"); return 0;
     }
 
-    bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || 
-                    ICmpInst::isSignedPredicate(ICI->getPredicate());
-      
+    bool isSigned = RHSICI->isSigned() || ICI->isSigned();
     Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext());
     if (Instruction *I = dyn_cast<Instruction>(RV))
       return I;
@@ -3848,9 +4090,9 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     
   // Ensure that the larger constant is on the RHS.
   bool ShouldSwap;
-  if (ICmpInst::isSignedPredicate(LHSCC) ||
+  if (CmpInst::isSigned(LHSCC) ||
       (ICmpInst::isEquality(LHSCC) && 
-       ICmpInst::isSignedPredicate(RHSCC)))
+       CmpInst::isSigned(RHSCC)))
     ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
   else
     ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
@@ -4167,7 +4409,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
         if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
             CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1))) {
+          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
             if (CastOp->getOpcode() == Instruction::And) {
               // Change: and (cast (and X, C1) to T), C2
               // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
@@ -4536,9 +4778,9 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   
   // Ensure that the larger constant is on the RHS.
   bool ShouldSwap;
-  if (ICmpInst::isSignedPredicate(LHSCC) ||
+  if (CmpInst::isSigned(LHSCC) ||
       (ICmpInst::isEquality(LHSCC) && 
-       ICmpInst::isSignedPredicate(RHSCC)))
+       CmpInst::isSigned(RHSCC)))
     ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
   else
     ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
@@ -4961,14 +5203,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (Ret) return Ret;
   }
 
-  if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
+  if ((A = dyn_castNotVal(Op0))) {   // ~A | Op1
     if (A == Op1)   // ~A | A == -1
       return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
   } else {
     A = 0;
   }
   // Note, A is still live here!
-  if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
+  if ((B = dyn_castNotVal(Op1))) {   // Op0 | ~B
     if (Op0 == B)
       return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
@@ -5065,12 +5307,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
 
   // Is this a ~ operation?
   if (Value *NotOp = dyn_castNotVal(&I)) {
-    // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
-    // ~(~X | Y) === (X & ~Y) - De Morgan's Law
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
       if (Op0I->getOpcode() == Instruction::And || 
           Op0I->getOpcode() == Instruction::Or) {
-        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+        if (dyn_castNotVal(Op0I->getOperand(1)))
+          Op0I->swapOperands();
         if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
           Value *NotY =
             Builder->CreateNot(Op0I->getOperand(1),
@@ -5079,6 +5322,19 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             return BinaryOperator::CreateOr(Op0NotVal, NotY);
           return BinaryOperator::CreateAnd(Op0NotVal, NotY);
         }
+        
+        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+        if (isFreeToInvert(Op0I->getOperand(0)) && 
+            isFreeToInvert(Op0I->getOperand(1))) {
+          Value *NotX =
+            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(NotX, NotY);
+          return BinaryOperator::CreateAnd(NotX, NotY);
+        }
       }
     }
   }
@@ -5379,166 +5635,6 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1,
                         IsSigned);
 }
 
-/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
-/// code necessary to compute the offset from the base pointer (without adding
-/// in the base pointer).  Return the result as a signed integer of intptr size.
-static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
-  TargetData &TD = *IC.getTargetData();
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  const Type *IntPtrTy = TD.getIntPtrType(I.getContext());
-  Value *Result = Constant::getNullValue(IntPtrTy);
-
-  // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
-  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
-  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
-       ++i, ++GTI) {
-    Value *Op = *i;
-    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
-    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
-      if (OpC->isZero()) continue;
-      
-      // Handle a struct index, which adds its field offset to the pointer.
-      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
-        
-        Result = IC.Builder->CreateAdd(Result,
-                                       ConstantInt::get(IntPtrTy, Size),
-                                       GEP->getName()+".offs");
-        continue;
-      }
-      
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-      Constant *OC =
-              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = ConstantExpr::getMul(OC, Scale);
-      // Emit an add instruction.
-      Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
-      continue;
-    }
-    // Convert to correct type.
-    if (Op->getType() != IntPtrTy)
-      Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
-    if (Size != 1) {
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-      // We'll let instcombine(mul) convert this to a shl if possible.
-      Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
-    }
-
-    // Emit an add instruction.
-    Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
-  }
-  return Result;
-}
-
-
-/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
-/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
-/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
-/// be complex, and scales are involved.  The above expression would also be
-/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
-/// This later form is less amenable to optimization though, and we are allowed
-/// to generate the first by knowing that pointer arithmetic doesn't overflow.
-///
-/// If we can't emit an optimized form for this expression, this returns null.
-/// 
-static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
-                                          InstCombiner &IC) {
-  TargetData &TD = *IC.getTargetData();
-  gep_type_iterator GTI = gep_type_begin(GEP);
-
-  // Check to see if this gep only has a single variable index.  If so, and if
-  // any constant indices are a multiple of its scale, then we can compute this
-  // in terms of the scale of the variable index.  For example, if the GEP
-  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
-  // because the expression will cross zero at the same point.
-  unsigned i, e = GEP->getNumOperands();
-  int64_t Offset = 0;
-  for (i = 1; i != e; ++i, ++GTI) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
-      // Compute the aggregate offset of constant indices.
-      if (CI->isZero()) continue;
-
-      // Handle a struct index, which adds its field offset to the pointer.
-      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
-      } else {
-        uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
-        Offset += Size*CI->getSExtValue();
-      }
-    } else {
-      // Found our variable index.
-      break;
-    }
-  }
-  
-  // If there are no variable indices, we must have a constant offset, just
-  // evaluate it the general way.
-  if (i == e) return 0;
-  
-  Value *VariableIdx = GEP->getOperand(i);
-  // Determine the scale factor of the variable element.  For example, this is
-  // 4 if the variable index is into an array of i32.
-  uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
-  
-  // Verify that there are no other variable indices.  If so, emit the hard way.
-  for (++i, ++GTI; i != e; ++i, ++GTI) {
-    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
-    if (!CI) return 0;
-   
-    // Compute the aggregate offset of constant indices.
-    if (CI->isZero()) continue;
-    
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
-    } else {
-      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
-      Offset += Size*CI->getSExtValue();
-    }
-  }
-  
-  // Okay, we know we have a single variable index, which must be a
-  // pointer/array/vector index.  If there is no offset, life is simple, return
-  // the index.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
-  if (Offset == 0) {
-    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
-    // we don't need to bother extending: the extension won't affect where the
-    // computation crosses zero.
-    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
-      VariableIdx = new TruncInst(VariableIdx, 
-                                  TD.getIntPtrType(VariableIdx->getContext()),
-                                  VariableIdx->getName(), &I);
-    return VariableIdx;
-  }
-  
-  // Otherwise, there is an index.  The computation we will do will be modulo
-  // the pointer size, so get it.
-  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-  
-  Offset &= PtrSizeMask;
-  VariableScale &= PtrSizeMask;
-
-  // To do this transformation, any constant index must be a multiple of the
-  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
-  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
-  // multiple of the variable scale.
-  int64_t NewOffs = Offset / (int64_t)VariableScale;
-  if (Offset != NewOffs*(int64_t)VariableScale)
-    return 0;
-
-  // Okay, we can do this evaluation.  Start by converting the index to intptr.
-  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
-  if (VariableIdx->getType() != IntPtrTy)
-    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
-                                              true /*SExt*/, 
-                                              VariableIdx->getName(), &I);
-  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
-  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
-}
-
 
 /// FoldGEPICmp - Fold comparisons between a GEP instruction and something
 /// else.  At this point we know that the GEP is on the LHS of the comparison.
@@ -5559,7 +5655,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
     
     // If not, synthesize the offset the hard way.
     if (Offset == 0)
-      Offset = EmitGEPOffset(GEPLHS, I, *this);
+      Offset = EmitGEPOffset(GEPLHS, *this);
     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
                         Constant::getNullValue(Offset->getType()));
   } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
@@ -5645,8 +5741,8 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
         (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
-      Value *L = EmitGEPOffset(GEPLHS, I, *this);
-      Value *R = EmitGEPOffset(GEPRHS, I, *this);
+      Value *L = EmitGEPOffset(GEPLHS, *this);
+      Value *R = EmitGEPOffset(GEPRHS, *this);
       return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
     }
   }
@@ -6087,7 +6183,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // EQ and NE we use unsigned values.
     APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
     APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
-    if (ICmpInst::isSignedPredicate(I.getPredicate())) {
+    if (I.isSigned()) {
       ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
                                              Op0Min, Op0Max);
       ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
@@ -6217,7 +6313,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
     // Turn a signed comparison into an unsigned one if both operands
     // are known to have the same sign.
-    if (I.isSignedPredicate() &&
+    if (I.isSigned() &&
         ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
          (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
       return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
@@ -6397,7 +6493,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
           if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
             if (CI->getValue().isSignBit()) {
-              ICmpInst::Predicate Pred = I.isSignedPredicate()
+              ICmpInst::Predicate Pred = I.isSigned()
                                              ? I.getUnsignedPredicate()
                                              : I.getSignedPredicate();
               return new ICmpInst(Pred, Op0I->getOperand(0),
@@ -6405,7 +6501,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             }
             
             if (CI->getValue().isMaxSignedValue()) {
-              ICmpInst::Predicate Pred = I.isSignedPredicate()
+              ICmpInst::Predicate Pred = I.isSigned()
                                              ? I.getUnsignedPredicate()
                                              : I.getSignedPredicate();
               Pred = I.getSwappedPredicate(Pred);
@@ -6542,7 +6638,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // work. :(  The if statement below tests that condition and bails 
   // if it finds it. 
   bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
-  if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate())
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
     return 0;
   if (DivRHS->isZero())
     return 0; // The ProdOV computation fails on divide by zero.
@@ -6741,7 +6837,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
         if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
           const APInt &SignBit = XorCST->getValue();
-          ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+          ICmpInst::Predicate Pred = ICI.isSigned()
                                          ? ICI.getUnsignedPredicate()
                                          : ICI.getSignedPredicate();
           return new ICmpInst(Pred, LHSI->getOperand(0),
@@ -6751,7 +6847,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
         if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
           const APInt &NotSignBit = XorCST->getValue();
-          ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+          ICmpInst::Predicate Pred = ICI.isSigned()
                                          ? ICI.getUnsignedPredicate()
                                          : ICI.getSignedPredicate();
           Pred = ICI.getSwappedPredicate(Pred);
@@ -7009,7 +7105,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
                             .subtract(LHSV);
 
-      if (ICI.isSignedPredicate()) {
+      if (ICI.isSigned()) {
         if (CR.getLower().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
                               ConstantInt::get(*Context, CR.getUpper()));
@@ -7184,7 +7280,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
     return 0;
 
   bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
-  bool isSignedCmp = ICI.isSignedPredicate();
+  bool isSignedCmp = ICI.isSigned();
 
   if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
     // Not an extension from the same type?
@@ -7745,7 +7841,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
 /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
 /// try to eliminate the cast by moving the type information into the alloc.
 Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
-                                                   AllocationInst &AI) {
+                                                   AllocaInst &AI) {
   const PointerType *PTy = cast<PointerType>(CI.getType());
   
   BuilderTy AllocaBuilder(*Builder);
@@ -7817,7 +7913,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
   }
   
-  AllocationInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
   New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
   
@@ -8163,8 +8259,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
     if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
       if (GEP->hasAllConstantIndices()) {
         // We are guaranteed to get a constant from EmitGEPOffset.
-        ConstantInt *OffsetV =
-                      cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
+        ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, *this));
         int64_t Offset = OffsetV->getSExtValue();
         
         // Get the base pointer input of the bitcast, and the type it points to.
@@ -8878,7 +8973,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // size, rewrite the allocation instruction to allocate the "right" type.
     // There is no need to modify malloc calls because it is their bitcast that
     // needs to be cleaned up.
-    if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
       if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
         return V;
     
@@ -9747,6 +9842,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
 /// the heavy lifting.
 ///
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  if (isFreeCall(&CI))
+    return visitFree(CI);
+
   // If the caller function is nounwind, mark the call as nounwind, even if the
   // callee isn't.
   if (CI.getParent()->getParent()->doesNotThrow() &&
@@ -10691,6 +10789,96 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   return true;
 }
 
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+  LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+  
+  // When processing loads, we need to propagate two bits of information to the
+  // sunk load: whether it is volatile, and what its alignment is.  We currently
+  // don't sink loads when some have their alignment specified and some don't.
+  // visitLoadInst will propagate an alignment onto the load when TD is around,
+  // and if TD isn't around, we can't handle the mixed case.
+  bool isVolatile = FirstLI->isVolatile();
+  unsigned LoadAlignment = FirstLI->getAlignment();
+  
+  // We can't sink the load if the loaded value could be modified between the
+  // load and the PHI.
+  if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+      !isSafeAndProfitableToSinkLoad(FirstLI))
+    return 0;
+  
+  // If the PHI is of volatile loads and the load block has multiple
+  // successors, sinking it would remove a load of the volatile value from
+  // the path through the other successor.
+  if (isVolatile && 
+      FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+  
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+    if (!LI || !LI->hasOneUse())
+      return 0;
+    
+    // We can't sink the load if the loaded value could be modified between 
+    // the load and the PHI.
+    if (LI->isVolatile() != isVolatile ||
+        LI->getParent() != PN.getIncomingBlock(i) ||
+        !isSafeAndProfitableToSinkLoad(LI))
+      return 0;
+      
+    // If some of the loads have an alignment specified but not all of them,
+    // we can't do the transformation.
+    if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+      return 0;
+    
+    LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+    
+    // If the PHI is of volatile loads and the load block has multiple
+    // successors, sinking it would remove a load of the volatile value from
+    // the path through the other successor.
+    if (isVolatile &&
+        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+      return 0;
+  }
+  
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+                                   PN.getName()+".in");
+  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+  
+  Value *InVal = FirstLI->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+  
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+  
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+  
+  // If this was a volatile load that we are merging, make sure to loop through
+  // and mark all the input loads as non-volatile.  If we don't do this, we will
+  // insert a new volatile load and the old ones will not be deletable.
+  if (isVolatile)
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+  
+  return new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+}
+
 
 // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
 // operator and they all are only used by the PHI, PHI together their
@@ -10698,13 +10886,18 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
 Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
 
+  if (isa<GetElementPtrInst>(FirstInst))
+    return FoldPHIArgGEPIntoPHI(PN);
+  if (isa<LoadInst>(FirstInst))
+    return FoldPHIArgLoadIntoPHI(PN);
+  
   // Scan the instruction, looking for input operations that can be folded away.
   // If all input operands to the phi are the same instruction (e.g. a cast from
   // the same type or "+42") we can pull the operation through the PHI, reducing
   // code size and simplifying code.
   Constant *ConstantOp = 0;
   const Type *CastSrcTy = 0;
-  bool isVolatile = false;
+  
   if (isa<CastInst>(FirstInst)) {
     CastSrcTy = FirstInst->getOperand(0)->getType();
   } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
@@ -10713,51 +10906,18 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
     if (ConstantOp == 0)
       return FoldPHIArgBinOpIntoPHI(PN);
-  } else if (LoadInst *LI = dyn_cast<LoadInst>(FirstInst)) {
-    isVolatile = LI->isVolatile();
-    // We can't sink the load if the loaded value could be modified between the
-    // load and the PHI.
-    if (LI->getParent() != PN.getIncomingBlock(0) ||
-        !isSafeAndProfitableToSinkLoad(LI))
-      return 0;
-    
-    // If the PHI is of volatile loads and the load block has multiple
-    // successors, sinking it would remove a load of the volatile value from
-    // the path through the other successor.
-    if (isVolatile &&
-        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
-      return 0;
-    
-  } else if (isa<GetElementPtrInst>(FirstInst)) {
-    return FoldPHIArgGEPIntoPHI(PN);
   } else {
     return 0;  // Cannot fold this operation.
   }
 
   // Check to see if all arguments are the same operation.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    if (!isa<Instruction>(PN.getIncomingValue(i))) return 0;
-    Instruction *I = cast<Instruction>(PN.getIncomingValue(i));
-    if (!I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
       return 0;
     if (CastSrcTy) {
       if (I->getOperand(0)->getType() != CastSrcTy)
         return 0;  // Cast operation must match.
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-      // We can't sink the load if the loaded value could be modified between 
-      // the load and the PHI.
-      if (LI->isVolatile() != isVolatile ||
-          LI->getParent() != PN.getIncomingBlock(i) ||
-          !isSafeAndProfitableToSinkLoad(LI))
-        return 0;
-      
-      // If the PHI is of volatile loads and the load block has multiple
-      // successors, sinking it would remove a load of the volatile value from
-      // the path through the other successor.
-      if (isVolatile &&
-          LI->getParent()->getTerminator()->getNumSuccessors() != 1)
-        return 0;
-      
     } else if (I->getOperand(1) != ConstantOp) {
       return 0;
     }
@@ -10792,23 +10952,15 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   }
 
   // Insert and return the new operation.
-  if (CastInst* FirstCI = dyn_cast<CastInst>(FirstInst))
+  if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
     return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+  
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
     return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
-  if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
-                           PhiVal, ConstantOp);
-  assert(isa<LoadInst>(FirstInst) && "Unknown operation");
-  
-  // If this was a volatile load that we are merging, make sure to loop through
-  // and mark all the input loads as non-volatile.  If we don't do this, we will
-  // insert a new volatile load and the old ones will not be deletable.
-  if (isVolatile)
-    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
-      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
   
-  return new LoadInst(PhiVal, "", isVolatile);
+  CmpInst *CIOp = cast<CmpInst>(FirstInst);
+  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                         PhiVal, ConstantOp);
 }
 
 /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
@@ -10940,6 +11092,31 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       }
     }
   }
+
+  // If there are multiple PHIs, sort their operands so that they all list
+  // the blocks in the same order. This will help identical PHIs be eliminated
+  // by other passes. Other passes shouldn't depend on this for correctness
+  // however.
+  PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+  if (&PN != FirstPN)
+    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *BBA = PN.getIncomingBlock(i);
+      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+      if (BBA != BBB) {
+        Value *VA = PN.getIncomingValue(i);
+        unsigned j = PN.getBasicBlockIndex(BBB);
+        Value *VB = PN.getIncomingValue(j);
+        PN.setIncomingBlock(i, BBB);
+        PN.setIncomingValue(i, VB);
+        PN.setIncomingBlock(j, BBA);
+        PN.setIncomingValue(j, VA);
+        // NOTE: Instcombine normally would want us to "return &PN" if we
+        // modified any of the operands of an instruction.  However, since we
+        // aren't adding or removing uses (just rearranging them) we don't do
+        // this in this case.
+      }
+    }
+
   return 0;
 }
 
@@ -11190,8 +11367,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
       // Determine how much the GEP moves the pointer.  We are guaranteed to get
       // a constant back from EmitGEPOffset.
-      ConstantInt *OffsetV =
-                    cast<ConstantInt>(EmitGEPOffset(&GEP, GEP, *this));
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, *this));
       int64_t Offset = OffsetV->getSExtValue();
       
       // If this GEP instruction doesn't move the pointer, just replace the GEP
@@ -11199,7 +11375,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (Offset == 0) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
-        if (isa<AllocationInst>(BCI->getOperand(0)) ||
+        if (isa<AllocaInst>(BCI->getOperand(0)) ||
             isMalloc(BCI->getOperand(0))) {
           // See if the bitcast simplifies, if so, don't nuke this GEP yet.
           if (Instruction *I = visitBitCast(*BCI)) {
@@ -11238,21 +11414,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   return 0;
 }
 
-Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
-  // Convert: malloc Ty, C - where C is a constant != 1 into: malloc [C x Ty], 1
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
       const Type *NewTy = 
         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
       assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
-      AllocationInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       New->setAlignment(AI.getAlignment());
 
       // Scan to the end of the allocation instructions, to skip over a block of
       // allocas if possible...also skip interleaved debug info
       //
       BasicBlock::iterator It = New;
-      while (isa<AllocationInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
 
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
@@ -11287,8 +11463,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
   return 0;
 }
 
-Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
-  Value *Op = FI.getOperand(0);
+Instruction *InstCombiner::visitFree(Instruction &FI) {
+  Value *Op = FI.getOperand(1);
 
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
@@ -11302,22 +11478,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   // when lots of inlining happens.
   if (isa<ConstantPointerNull>(Op))
     return EraseInstFromFunction(FI);
-  
-  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
-  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op)) {
-    FI.setOperand(0, CI->getOperand(0));
-    return &FI;
-  }
-  
-  // Change free (gep X, 0,0,0,0) into free(X)
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
-    if (GEPI->hasAllZeroIndices()) {
-      Worklist.Add(GEPI);
-      FI.setOperand(0, GEPI->getOperand(0));
-      return &FI;
-    }
-  }
-  
+
+  // If we have a malloc call whose only use is a free call, delete both.
   if (isMalloc(Op)) {
     if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
       if (Op->hasOneUse() && CI->hasOneUse()) {
@@ -11337,7 +11499,6 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   return 0;
 }
 
-
 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
                                         const TargetData *TD) {
@@ -11838,9 +11999,11 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
         return false;
       --BBI;
     }
-    // If this isn't a store, or isn't a store to the same location, bail out.
+    // If this isn't a store, isn't a store to the same location, or if the
+    // alignments differ, bail out.
     OtherStore = dyn_cast<StoreInst>(BBI);
-    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1))
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+        OtherStore->getAlignment() != SI.getAlignment())
       return false;
   } else {
     // Otherwise, the other block ended with a conditional branch. If one of the
@@ -11855,7 +12018,8 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     for (;; --BBI) {
       // Check to see if we find the matching store.
       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
-        if (OtherStore->getOperand(1) != SI.getOperand(1))
+        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+            OtherStore->getAlignment() != SI.getAlignment())
           return false;
         break;
       }
@@ -11890,7 +12054,8 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   // insert it.
   BBI = DestBB->getFirstNonPHI();
   InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
-                                    OtherStore->isVolatile()), *BBI);
+                                    OtherStore->isVolatile(),
+                                    SI.getAlignment()), *BBI);
   
   // Nuke the old stores.
   EraseInstFromFunction(SI);
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 5f93756..866d8b4 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -33,8 +33,6 @@ namespace {
     // Possibly eliminate loop L if it is dead.
     bool runOnLoop(Loop* L, LPPassManager& LPM);
     
-    bool SingleDominatingExit(Loop* L,
-                              SmallVector<BasicBlock*, 4>& exitingBlocks);
     bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
                     SmallVector<BasicBlock*, 4>& exitBlocks,
                     bool &Changed, BasicBlock *Preheader);
@@ -63,25 +61,6 @@ Pass* llvm::createLoopDeletionPass() {
   return new LoopDeletion();
 }
 
-/// SingleDominatingExit - Checks that there is only a single blocks that 
-/// branches out of the loop, and that it also g the latch block.  Loops
-/// with multiple or non-latch-dominating exiting blocks could be dead, but we'd
-/// have to do more extensive analysis to make sure, for instance, that the 
-/// control flow logic involved was or could be made loop-invariant.
-bool LoopDeletion::SingleDominatingExit(Loop* L,
-                                   SmallVector<BasicBlock*, 4>& exitingBlocks) {
-  
-  if (exitingBlocks.size() != 1)
-    return false;
-  
-  BasicBlock* latch = L->getLoopLatch();
-  if (!latch)
-    return false;
-  
-  DominatorTree& DT = getAnalysis<DominatorTree>();
-  return DT.dominates(exitingBlocks[0], latch);
-}
-
 /// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
 /// checked for unique exit and exiting blocks, and that the code is in LCSSA
 /// form.
@@ -154,9 +133,8 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   if (exitBlocks.size() != 1)
     return false;
   
-  // Loops with multiple exits or exits that don't dominate the latch
-  // are too complicated to handle correctly.
-  if (!SingleDominatingExit(L, exitingBlocks))
+  // Loops with multiple exits are too complicated to handle correctly.
+  if (exitingBlocks.size() != 1)
     return false;
   
   // Finally, we have to check that the loop really is dead.
@@ -167,7 +145,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
   ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
-  const SCEV *S = SE.getBackedgeTakenCount(L);
+  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(S))
     return Changed;
   
@@ -183,7 +161,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // Tell ScalarEvolution that the loop is deleted. Do this before
   // deleting the loop so that ScalarEvolution can look at the loop
   // to determine what it needs to clean up.
-  SE.forgetLoopBackedgeTakenCount(L);
+  SE.forgetLoop(L);
 
   // Connect the preheader directly to the exit block.
   TerminatorInst* TI = preheader->getTerminator();
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 5f9d370..920d85c 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -426,7 +426,7 @@ bool LoopIndexSplit::processOneIterationLoop() {
   //      c1 = icmp uge i32 SplitValue, StartValue
   //      c2 = icmp ult i32 SplitValue, ExitValue
   //      and i32 c1, c2 
-  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSignedPredicate() ? 
+  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ? 
                                  ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
                                  SplitValue, StartValue, "lisplit");
 
@@ -478,7 +478,7 @@ bool LoopIndexSplit::processOneIterationLoop() {
 /// with a loop invariant value. Update loop's lower and upper bound based on 
 /// the loop invariant value.
 bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
-  bool Sign = Op.isSignedPredicate();
+  bool Sign = Op.isSigned();
   Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
 
   if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
@@ -933,7 +933,7 @@ bool LoopIndexSplit::splitLoop() {
     return false;
 
   // If the predicate sign does not match then skip.
-  if (ExitCondition->isSignedPredicate() != SplitCondition->isSignedPredicate())
+  if (ExitCondition->isSigned() != SplitCondition->isSigned())
     return false;
 
   unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
@@ -963,7 +963,7 @@ bool LoopIndexSplit::splitLoop() {
   //[*] Calculate new loop bounds.
   Value *AEV = SplitValue;
   Value *BSV = SplitValue;
-  bool Sign = SplitCondition->isSignedPredicate();
+  bool Sign = SplitCondition->isSigned();
   Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
 
   if (IVisLT(*ExitCondition)) {
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 70c69bb..7a4bb35 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/Statistic.h"
@@ -32,16 +33,6 @@ using namespace llvm;
 STATISTIC(NumRotated, "Number of loops rotated");
 namespace {
 
-  class RenameData {
-  public:
-    RenameData(Instruction *O, Value *P, Instruction *H) 
-      : Original(O), PreHeader(P), Header(H) { }
-  public:
-    Instruction *Original; // Original instruction
-    Value *PreHeader; // Original pre-header replacement
-    Instruction *Header; // New header replacement
-  };
-  
   class LoopRotate : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
@@ -71,25 +62,12 @@ namespace {
     /// Initialize local data
     void initialize();
 
-    /// Make sure all Exit block PHINodes have required incoming values.
-    /// If incoming value is constant or defined outside the loop then
-    /// PHINode may not have an entry for original pre-header. 
-    void  updateExitBlock();
-
-    /// Return true if this instruction is used outside original header.
-    bool usedOutsideOriginalHeader(Instruction *In);
-
-    /// Find Replacement information for instruction. Return NULL if it is
-    /// not available.
-    const RenameData *findReplacementData(Instruction *I);
-
     /// After loop rotation, loop pre-header has multiple sucessors.
     /// Insert one forwarding basic block to ensure that loop pre-header
     /// has only one successor.
     void preserveCanonicalLoopForm(LPPassManager &LPM);
 
   private:
-
     Loop *L;
     BasicBlock *OrigHeader;
     BasicBlock *OrigPreHeader;
@@ -97,7 +75,6 @@ namespace {
     BasicBlock *NewHeader;
     BasicBlock *Exit;
     LPPassManager *LPM_Ptr;
-    SmallVector<RenameData, MAX_HEADER_SIZE> LoopHeaderInfo;
   };
 }
   
@@ -141,7 +118,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   // If the loop header is not one of the loop exiting blocks then
   // either this loop is already rotated or it is not
   // suitable for loop rotation transformations.
-  if (!L->isLoopExit(OrigHeader))
+  if (!L->isLoopExiting(OrigHeader))
     return false;
 
   BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
@@ -180,7 +157,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   // Anything ScalarEvolution may know about this loop or the PHI nodes
   // in its header will soon be invalidated.
   if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
-    SE->forgetLoopBackedgeTakenCount(L);
+    SE->forgetLoop(L);
 
   // Find new Loop header. NewHeader is a Header's one and only successor
   // that is inside loop.  Header's other successor is outside the
@@ -199,168 +176,88 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
          "New header doesn't have one pred!");
   FoldSingleEntryPHINodes(NewHeader);
 
-  // Copy PHI nodes and other instructions from the original header
-  // into the original pre-header. Unlike the original header, the original
-  // pre-header is not a member of the loop.
-  //
-  // The new loop header is the one and only successor of original header that
-  // is inside the loop. All other original header successors are outside 
-  // the loop. Copy PHI Nodes from the original header into the new loop header.
-  // Add second incoming value, from original loop pre-header into these phi 
-  // nodes. If a value defined in original header is used outside original 
-  // header then new loop header will need new phi nodes with two incoming 
-  // values, one definition from original header and second definition is 
-  // from original loop pre-header.
-
-  // Remove terminator from Original pre-header. Original pre-header will
-  // receive a clone of original header terminator as a new terminator.
-  OrigPreHeader->getInstList().pop_back();
+  // Begin by walking OrigHeader and populating ValueMap with an entry for
+  // each Instruction.
   BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
-  PHINode *PN = 0;
-  for (; (PN = dyn_cast<PHINode>(I)); ++I) {
-    // PHI nodes are not copied into original pre-header. Instead their values
-    // are directly propagated.
-    Value *NPV = PN->getIncomingValueForBlock(OrigPreHeader);
-
-    // Create a new PHI node with two incoming values for NewHeader.
-    // One incoming value is from OrigLatch (through OrigHeader) and the
-    // second incoming value is from original pre-header.
-    PHINode *NH = PHINode::Create(PN->getType(), PN->getName(),
-                                  NewHeader->begin());
-    NH->addIncoming(PN->getIncomingValueForBlock(OrigLatch), OrigHeader);
-    NH->addIncoming(NPV, OrigPreHeader);
-    
-    // "In" can be replaced by NH at various places.
-    LoopHeaderInfo.push_back(RenameData(PN, NPV, NH));
-  }
+  DenseMap<const Value *, Value *> ValueMap;
 
-  // Now, handle non-phi instructions.
-  for (; I != E; ++I) {
-    Instruction *In = I;
-    assert(!isa<PHINode>(In) && "PHINode is not expected here");
-    
-    // This is not a PHI instruction. Insert its clone into original pre-header.
-    // If this instruction is using a value from same basic block then
-    // update it to use value from cloned instruction.
-    Instruction *C = In->clone();
-    C->setName(In->getName());
-    OrigPreHeader->getInstList().push_back(C);
-
-    for (unsigned opi = 0, e = In->getNumOperands(); opi != e; ++opi) {
-      Instruction *OpInsn = dyn_cast<Instruction>(In->getOperand(opi));
-      if (!OpInsn) continue;  // Ignore non-instruction values.
-      if (const RenameData *D = findReplacementData(OpInsn))
-        C->setOperand(opi, D->PreHeader);
-    }
+  // For PHI nodes, the value available in OldPreHeader is just the
+  // incoming value from OldPreHeader.
+  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
 
-    // If this instruction is used outside this basic block then
-    // create new PHINode for this instruction.
-    Instruction *NewHeaderReplacement = NULL;
-    if (usedOutsideOriginalHeader(In)) {
-      PHINode *PN = PHINode::Create(In->getType(), In->getName(),
-                                    NewHeader->begin());
-      PN->addIncoming(In, OrigHeader);
-      PN->addIncoming(C, OrigPreHeader);
-      NewHeaderReplacement = PN;
-    }
-    LoopHeaderInfo.push_back(RenameData(In, C, NewHeaderReplacement));
+  // For the rest of the instructions, create a clone in the OldPreHeader.
+  TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
+  for (; I != E; ++I) {
+    Instruction *C = I->clone();
+    C->setName(I->getName());
+    C->insertBefore(LoopEntryBranch);
+    ValueMap[I] = C;
   }
 
-  // Rename uses of original header instructions to reflect their new
-  // definitions (either from original pre-header node or from newly created
-  // new header PHINodes.
-  //
-  // Original header instructions are used in
-  // 1) Original header:
-  //
-  //    If instruction is used in non-phi instructions then it is using
-  //    defintion from original heder iteself. Do not replace this use
-  //    with definition from new header or original pre-header.
-  //
-  //    If instruction is used in phi node then it is an incoming 
-  //    value. Rename its use to reflect new definition from new-preheader
-  //    or new header.
-  //
-  // 2) Inside loop but not in original header
-  //
-  //    Replace this use to reflect definition from new header.
-  for (unsigned LHI = 0, LHI_E = LoopHeaderInfo.size(); LHI != LHI_E; ++LHI) {
-    const RenameData &ILoopHeaderInfo = LoopHeaderInfo[LHI];
-
-    if (!ILoopHeaderInfo.Header)
-      continue;
-
-    Instruction *OldPhi = ILoopHeaderInfo.Original;
-    Instruction *NewPhi = ILoopHeaderInfo.Header;
-
-    // Before replacing uses, collect them first, so that iterator is
-    // not invalidated.
-    SmallVector<Instruction *, 16> AllUses;
-    for (Value::use_iterator UI = OldPhi->use_begin(), UE = OldPhi->use_end();
-         UI != UE; ++UI)
-      AllUses.push_back(cast<Instruction>(UI));
-
-    for (SmallVector<Instruction *, 16>::iterator UI = AllUses.begin(), 
-           UE = AllUses.end(); UI != UE; ++UI) {
-      Instruction *U = *UI;
-      BasicBlock *Parent = U->getParent();
-
-      // Used inside original header
-      if (Parent == OrigHeader) {
-        // Do not rename uses inside original header non-phi instructions.
-        PHINode *PU = dyn_cast<PHINode>(U);
-        if (!PU)
+  // Along with all the other instructions, we just cloned OrigHeader's
+  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+  // successors by duplicating their incoming values for OrigHeader.
+  TerminatorInst *TI = OrigHeader->getTerminator();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
+
+  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+  // OrigPreHeader's old terminator (the original branch into the loop), and
+  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+  LoopEntryBranch->eraseFromParent();
+  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+
+  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+  // as necessary.
+  SSAUpdater SSA;
+  for (I = OrigHeader->begin(); I != E; ++I) {
+    Value *OrigHeaderVal = I;
+    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+
+    // The value now exits in two versions: the initial value in the preheader
+    // and the loop "next" value in the original header.
+    SSA.Initialize(OrigHeaderVal);
+    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+    SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
+
+    // Visit each use of the OrigHeader instruction.
+    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+         UE = OrigHeaderVal->use_end(); UI != UE; ) {
+      // Grab the use before incrementing the iterator.
+      Use &U = UI.getUse();
+
+      // Increment the iterator before removing the use from the list.
+      ++UI;
+
+      // SSAUpdater can't handle a non-PHI use in the same block as an
+      // earlier def. We can easily handle those cases manually.
+      Instruction *UserInst = cast<Instruction>(U.getUser());
+      if (!isa<PHINode>(UserInst)) {
+        BasicBlock *UserBB = UserInst->getParent();
+
+        // The original users in the OrigHeader are already using the
+        // original definitions.
+        if (UserBB == OrigHeader)
           continue;
 
-        // Do not rename uses inside original header phi nodes, if the
-        // incoming value is for new header.
-        if (PU->getBasicBlockIndex(NewHeader) != -1
-            && PU->getIncomingValueForBlock(NewHeader) == U)
+        // Users in the OrigPreHeader need to use the value to which the
+        // original definitions are mapped.
+        if (UserBB == OrigPreHeader) {
+          U = OrigPreHeaderVal;
           continue;
-        
-       U->replaceUsesOfWith(OldPhi, NewPhi);
-       continue;
+        }
       }
 
-      // Used inside loop, but not in original header.
-      if (L->contains(U->getParent())) {
-        if (U != NewPhi)
-          U->replaceUsesOfWith(OldPhi, NewPhi);
-        continue;
-      }
-      
-      // Used inside Exit Block. Since we are in LCSSA form, U must be PHINode.
-      if (U->getParent() == Exit) {
-        assert(isa<PHINode>(U) && "Use in Exit Block that is not PHINode");
-        
-        PHINode *UPhi = cast<PHINode>(U);
-        // UPhi already has one incoming argument from original header. 
-        // Add second incoming argument from new Pre header.
-        UPhi->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
-      } else {
-        // Used outside Exit block. Create a new PHI node in the exit block
-        // to receive the value from the new header and pre-header.
-        PHINode *PN = PHINode::Create(U->getType(), U->getName(),
-                                      Exit->begin());
-        PN->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
-        PN->addIncoming(OldPhi, OrigHeader);
-        U->replaceUsesOfWith(OldPhi, PN);
-      }
+      // Anything else can be handled by SSAUpdater.
+      SSA.RewriteUse(U);
     }
   }
-  
-  /// Make sure all Exit block PHINodes have required incoming values.
-  updateExitBlock();
-
-  // Update CFG
 
-  // Removing incoming branch from loop preheader to original header.
-  // Now original header is inside the loop.
-  for (BasicBlock::iterator I = OrigHeader->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I)
-    PN->removeIncomingValue(OrigPreHeader);
-
-  // Make NewHeader as the new header for the loop.
+  // NewHeader is now the header of the loop.
   L->moveToHeader(NewHeader);
 
   preserveCanonicalLoopForm(LPM);
@@ -369,31 +266,6 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   return true;
 }
 
-/// Make sure all Exit block PHINodes have required incoming values.
-/// If an incoming value is constant or defined outside the loop then
-/// PHINode may not have an entry for the original pre-header.
-void LoopRotate::updateExitBlock() {
-
-  PHINode *PN;
-  for (BasicBlock::iterator I = Exit->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I) {
-
-    // There is already one incoming value from original pre-header block.
-    if (PN->getBasicBlockIndex(OrigPreHeader) != -1)
-      continue;
-
-    const RenameData *ILoopHeaderInfo;
-    Value *V = PN->getIncomingValueForBlock(OrigHeader);
-    if (isa<Instruction>(V) &&
-        (ILoopHeaderInfo = findReplacementData(cast<Instruction>(V)))) {
-      assert(ILoopHeaderInfo->PreHeader && "Missing New Preheader Instruction");
-      PN->addIncoming(ILoopHeaderInfo->PreHeader, OrigPreHeader);
-    } else {
-      PN->addIncoming(V, OrigPreHeader);
-    }
-  }
-}
-
 /// Initialize local data
 void LoopRotate::initialize() {
   L = NULL;
@@ -401,34 +273,6 @@ void LoopRotate::initialize() {
   OrigPreHeader = NULL;
   NewHeader = NULL;
   Exit = NULL;
-
-  LoopHeaderInfo.clear();
-}
-
-/// Return true if this instruction is used by any instructions in the loop that
-/// aren't in original header.
-bool LoopRotate::usedOutsideOriginalHeader(Instruction *In) {
-  for (Value::use_iterator UI = In->use_begin(), UE = In->use_end();
-       UI != UE; ++UI) {
-    BasicBlock *UserBB = cast<Instruction>(UI)->getParent();
-    if (UserBB != OrigHeader && L->contains(UserBB))
-      return true;
-  }
-
-  return false;
-}
-
-/// Find Replacement information for instruction. Return NULL if it is
-/// not available.
-const RenameData *LoopRotate::findReplacementData(Instruction *In) {
-
-  // Since LoopHeaderInfo is small, linear walk is OK.
-  for (unsigned LHI = 0, LHI_E = LoopHeaderInfo.size(); LHI != LHI_E; ++LHI) {
-    const RenameData &ILoopHeaderInfo = LoopHeaderInfo[LHI];
-    if (ILoopHeaderInfo.Original == In)
-      return &ILoopHeaderInfo;
-  }
-  return NULL;
 }
 
 /// After loop rotation, loop pre-header has multiple sucessors.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index d8f6cc1..e20fb16 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1914,7 +1914,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
         continue;
 
       // Watch out for overflow.
-      if (ICmpInst::isSignedPredicate(Predicate) &&
+      if (ICmpInst::isSigned(Predicate) &&
           (CmpVal & SignBit) != (NewCmpVal & SignBit))
         continue;
 
@@ -1956,7 +1956,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
         // Check if it is possible to rewrite it using
         // an iv / stride of a smaller integer type.
         unsigned Bits = NewTyBits;
-        if (ICmpInst::isSignedPredicate(Predicate))
+        if (ICmpInst::isSigned(Predicate))
           --Bits;
         uint64_t Mask = (1ULL << Bits) - 1;
         if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
@@ -2262,6 +2262,10 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
 
       if (!C) continue;
 
+      // Ignore negative constants, as the code below doesn't handle them
+      // correctly. TODO: Remove this restriction.
+      if (!C->getValue().isStrictlyPositive()) continue;
+
       /* Add new PHINode. */
       PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
new file mode 100644
index 0000000..c2bf9f2
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,151 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller.  It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+  cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+  cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+  cl::desc("Allows loops to be partially unrolled until "
+           "-unroll-threshold loop size is reached."));
+
+namespace {
+  class LoopUnroll : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopUnroll() : LoopPass(&ID) {}
+
+    /// A magic value for use with the Threshold parameter to indicate
+    /// that the loop unroll should be performed regardless of how much
+    /// code expansion would result.
+    static const unsigned NoThreshold = UINT_MAX;
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG...
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addRequired<LoopInfo>();
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<LoopInfo>();
+      // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+      // If loop unroll does not preserve dom info then LCSSA pass on next
+      // loop will receive invalid dom info.
+      // For now, recreate dom info, if loop is unrolled.
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<DominanceFrontier>();
+    }
+  };
+}
+
+char LoopUnroll::ID = 0;
+static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+
+Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L) {
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+  return Metrics.NumInsts;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+  assert(L->isLCSSAForm());
+  LoopInfo *LI = &getAnalysis<LoopInfo>();
+
+  BasicBlock *Header = L->getHeader();
+  DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
+        << "] Loop %" << Header->getName() << "\n");
+  (void)Header;
+
+  // Find trip count
+  unsigned TripCount = L->getSmallConstantTripCount();
+  unsigned Count = UnrollCount;
+
+  // Automatically select an unroll count.
+  if (Count == 0) {
+    // Conservative heuristic: if we know the trip count, see if we can
+    // completely unroll (subject to the threshold, checked below); otherwise
+    // try to find greatest modulo of the trip count which is still under
+    // threshold value.
+    if (TripCount == 0)
+      return false;
+    Count = TripCount;
+  }
+
+  // Enforce the threshold.
+  if (UnrollThreshold != NoThreshold) {
+    unsigned LoopSize = ApproximateLoopSize(L);
+    DEBUG(errs() << "  Loop Size = " << LoopSize << "\n");
+    uint64_t Size = (uint64_t)LoopSize*Count;
+    if (TripCount != 1 && Size > UnrollThreshold) {
+      DEBUG(errs() << "  Too large to fully unroll with count: " << Count
+            << " because size: " << Size << ">" << UnrollThreshold << "\n");
+      if (!UnrollAllowPartial) {
+        DEBUG(errs() << "  will not try to unroll partially because "
+              << "-unroll-allow-partial not given\n");
+        return false;
+      }
+      // Reduce unroll count to be modulo of TripCount for partial unrolling
+      Count = UnrollThreshold / LoopSize;
+      while (Count != 0 && TripCount%Count != 0) {
+        Count--;
+      }
+      if (Count < 2) {
+        DEBUG(errs() << "  could not unroll partially\n");
+        return false;
+      }
+      DEBUG(errs() << "  partially unrolling with count: " << Count << "\n");
+    }
+  }
+
+  // Unroll the loop.
+  Function *F = L->getHeader()->getParent();
+  if (!UnrollLoop(L, Count, LI, &LPM))
+    return false;
+
+  // FIXME: Reconstruct dom info, because it is not preserved properly.
+  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+  if (DT) {
+    DT->runOnFunction(*F);
+    DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+    if (DF)
+      DF->runOnFunction(*F);
+  }
+  return true;
+}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 223d2b9..c7b00da 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -430,7 +430,8 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
     // large numbers of branches which cause loop unswitching to go crazy.
     // This is a very ad-hoc heuristic.
     if (Metrics.NumInsts > Threshold ||
-        Metrics.NumBlocks * 5 > Threshold) {
+        Metrics.NumBlocks * 5 > Threshold ||
+        Metrics.NeverInline) {
       DEBUG(errs() << "NOT unswitching loop %"
             << currentLoop->getHeader()->getName() << ", cost too high: "
             << currentLoop->getBlocks().size() << "\n");
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index b745097..05a0eee 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -15,10 +15,6 @@
 //   * Proves values to be constant, and replaces them with constants
 //   * Proves conditional branches to be unconditional
 //
-// Notice that:
-//   * This pass has a habit of making definitions be dead.  It is a good idea
-//     to to run a DCE pass sometime after running this pass.
-//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sccp"
@@ -27,11 +23,11 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -39,7 +35,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -51,7 +48,6 @@ STATISTIC(NumInstRemoved, "Number of instructions removed");
 STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
 
 STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
-STATISTIC(IPNumDeadBlocks , "Number of basic blocks unreachable by IPSCCP");
 STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
 STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
 
@@ -60,7 +56,7 @@ namespace {
 /// an LLVM value may occupy.  It is a simple class with value semantics.
 ///
 class LatticeVal {
-  enum {
+  enum LatticeValueTy {
     /// undefined - This LLVM Value has no known value yet.
     undefined,
     
@@ -76,63 +72,82 @@ class LatticeVal {
     /// overdefined - This instruction is not known to be constant, and we know
     /// it has a value.
     overdefined
-  } LatticeValue;    // The current lattice position
+  };
+
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'forcedconstant' value.
+  PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+  
+  LatticeValueTy getLatticeValue() const {
+    return Val.getInt();
+  }
   
-  Constant *ConstantVal; // If Constant value, the current value
 public:
-  inline LatticeVal() : LatticeValue(undefined), ConstantVal(0) {}
+  LatticeVal() : Val(0, undefined) {}
   
-  // markOverdefined - Return true if this is a new status to be in...
-  inline bool markOverdefined() {
-    if (LatticeValue != overdefined) {
-      LatticeValue = overdefined;
-      return true;
-    }
-    return false;
+  bool isUndefined() const { return getLatticeValue() == undefined; }
+  bool isConstant() const {
+    return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
+  }
+  bool isOverdefined() const { return getLatticeValue() == overdefined; }
+  
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val.getPointer();
+  }
+  
+  /// markOverdefined - Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    
+    Val.setInt(overdefined);
+    return true;
   }
 
-  // markConstant - Return true if this is a new status for us.
-  inline bool markConstant(Constant *V) {
-    if (LatticeValue != constant) {
-      if (LatticeValue == undefined) {
-        LatticeValue = constant;
-        assert(V && "Marking constant with NULL");
-        ConstantVal = V;
-      } else {
-        assert(LatticeValue == forcedconstant && 
-               "Cannot move from overdefined to constant!");
-        // Stay at forcedconstant if the constant is the same.
-        if (V == ConstantVal) return false;
-        
-        // Otherwise, we go to overdefined.  Assumptions made based on the
-        // forced value are possibly wrong.  Assuming this is another constant
-        // could expose a contradiction.
-        LatticeValue = overdefined;
-      }
-      return true;
+  /// markConstant - Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    if (getLatticeValue() == constant) { // Constant but not forcedconstant.
+      assert(getConstant() == V && "Marking constant with different value");
+      return false;
+    }
+    
+    if (isUndefined()) {
+      Val.setInt(constant);
+      assert(V && "Marking constant with NULL");
+      Val.setPointer(V);
     } else {
-      assert(ConstantVal == V && "Marking constant with different value");
+      assert(getLatticeValue() == forcedconstant && 
+             "Cannot move from overdefined to constant!");
+      // Stay at forcedconstant if the constant is the same.
+      if (V == getConstant()) return false;
+      
+      // Otherwise, we go to overdefined.  Assumptions made based on the
+      // forced value are possibly wrong.  Assuming this is another constant
+      // could expose a contradiction.
+      Val.setInt(overdefined);
     }
-    return false;
+    return true;
   }
 
-  inline void markForcedConstant(Constant *V) {
-    assert(LatticeValue == undefined && "Can't force a defined value!");
-    LatticeValue = forcedconstant;
-    ConstantVal = V;
+  /// getConstantInt - If this is a constant with a ConstantInt value, return it
+  /// otherwise return null.
+  ConstantInt *getConstantInt() const {
+    if (isConstant())
+      return dyn_cast<ConstantInt>(getConstant());
+    return 0;
   }
   
-  inline bool isUndefined() const { return LatticeValue == undefined; }
-  inline bool isConstant() const {
-    return LatticeValue == constant || LatticeValue == forcedconstant;
-  }
-  inline bool isOverdefined() const { return LatticeValue == overdefined; }
-
-  inline Constant *getConstant() const {
-    assert(isConstant() && "Cannot get the constant of a non-constant!");
-    return ConstantVal;
+  void markForcedConstant(Constant *V) {
+    assert(isUndefined() && "Can't force a defined value!");
+    Val.setInt(forcedconstant);
+    Val.setPointer(V);
   }
 };
+} // end anonymous namespace.
+
+
+namespace {
 
 //===----------------------------------------------------------------------===//
 //
@@ -140,10 +155,15 @@ public:
 /// Constant Propagation.
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
-  LLVMContext *Context;
-  DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable
-  std::map<Value*, LatticeVal> ValueState;  // The state each value is in.
+  const TargetData *TD;
+  SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+  DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
 
+  /// StructValueState - This maintains ValueState for values that have
+  /// StructType, for example for formal arguments, calls, insertelement, etc.
+  ///
+  DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
+  
   /// GlobalValue - If we are tracking any values for the contents of a global
   /// variable, we keep a mapping from the constant accessor to the element of
   /// the global, to the currently known value.  If the value becomes
@@ -158,13 +178,23 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
   /// that return multiple values.
   DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
-
-  // The reason for two worklists is that overdefined is the lowest state
-  // on the lattice, and moving things to overdefined as fast as possible
-  // makes SCCP converge much faster.
-  // By having a separate worklist, we accomplish this because everything
-  // possibly overdefined will become overdefined at the soonest possible
-  // point.
+  
+  /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+  /// represented here for efficient lookup.
+  SmallPtrSet<Function*, 16> MRVFunctionsTracked;
+
+  /// TrackingIncomingArguments - This is the set of functions for whose
+  /// arguments we make optimistic assumptions about and try to prove as
+  /// constants.
+  SmallPtrSet<Function*, 16> TrackingIncomingArguments;
+  
+  /// The reason for two worklists is that overdefined is the lowest state
+  /// on the lattice, and moving things to overdefined as fast as possible
+  /// makes SCCP converge much faster.
+  ///
+  /// By having a separate worklist, we accomplish this because everything
+  /// possibly overdefined will become overdefined at the soonest possible
+  /// point.
   SmallVector<Value*, 64> OverdefinedInstWorkList;
   SmallVector<Value*, 64> InstWorkList;
 
@@ -180,14 +210,17 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
-  void setContext(LLVMContext *C) { Context = C; }
+  SCCPSolver(const TargetData *td) : TD(td) {}
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
-  void MarkBlockExecutable(BasicBlock *BB) {
+  ///
+  /// This returns true if the block was not considered live before.
+  bool MarkBlockExecutable(BasicBlock *BB) {
+    if (!BBExecutable.insert(BB)) return false;
     DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");
-    BBExecutable.insert(BB);   // Basic block is executable!
     BBWorkList.push_back(BB);  // Add the block to the work list!
+    return true;
   }
 
   /// TrackValueOfGlobalVariable - Clients can use this method to
@@ -195,8 +228,8 @@ public:
   /// specified global variable if it can.  This is only legal to call if
   /// performing Interprocedural SCCP.
   void TrackValueOfGlobalVariable(GlobalVariable *GV) {
-    const Type *ElTy = GV->getType()->getElementType();
-    if (ElTy->isFirstClassType()) {
+    // We only track the contents of scalar globals.
+    if (GV->getType()->getElementType()->isSingleValueType()) {
       LatticeVal &IV = TrackedGlobals[GV];
       if (!isa<UndefValue>(GV->getInitializer()))
         IV.markConstant(GV->getInitializer());
@@ -207,9 +240,9 @@ public:
   /// and out of the specified function (which cannot have its address taken),
   /// this method must be called.
   void AddTrackedFunction(Function *F) {
-    assert(F->hasLocalLinkage() && "Can only track internal functions!");
     // Add an entry, F -> undef.
     if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+      MRVFunctionsTracked.insert(F);
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
         TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
                                                      LatticeVal()));
@@ -217,6 +250,10 @@ public:
       TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
   }
 
+  void AddArgumentTrackedFunction(Function *F) {
+    TrackingIncomingArguments.insert(F);
+  }
+  
   /// Solve - Solve for constants and executable blocks.
   ///
   void Solve();
@@ -232,10 +269,17 @@ public:
     return BBExecutable.count(BB);
   }
 
-  /// getValueMapping - Once we have solved for constants, return the mapping of
-  /// LLVM values to LatticeVals.
-  std::map<Value*, LatticeVal> &getValueMapping() {
-    return ValueState;
+  LatticeVal getLatticeValueFor(Value *V) const {
+    DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+    assert(I != ValueState.end() && "V is not in valuemap!");
+    return I->second;
+  }
+  
+  LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = 
+      StructValueState.find(std::make_pair(V, i));
+    assert(I != StructValueState.end() && "V is not in valuemap!");
+    return I->second;
   }
 
   /// getTrackedRetVals - Get the inferred return value map.
@@ -250,48 +294,61 @@ public:
     return TrackedGlobals;
   }
 
-  inline void markOverdefined(Value *V) {
+  void markOverdefined(Value *V) {
+    assert(!isa<StructType>(V->getType()) && "Should use other method");
     markOverdefined(ValueState[V], V);
   }
 
+  /// markAnythingOverdefined - Mark the specified value overdefined.  This
+  /// works with both scalars and structs.
+  void markAnythingOverdefined(Value *V) {
+    if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        markOverdefined(getStructValueState(V, i), V);
+    else
+      markOverdefined(V);
+  }
+  
 private:
   // markConstant - Make a value be marked as "constant".  If the value
   // is not already a constant, add it to the instruction work list so that
   // the users of the instruction are updated later.
   //
-  inline void markConstant(LatticeVal &IV, Value *V, Constant *C) {
-    if (IV.markConstant(C)) {
-      DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n');
-      InstWorkList.push_back(V);
-    }
-  }
-  
-  inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) {
-    IV.markForcedConstant(C);
-    DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+  void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+    if (!IV.markConstant(C)) return;
+    DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n');
     InstWorkList.push_back(V);
   }
   
-  inline void markConstant(Value *V, Constant *C) {
+  void markConstant(Value *V, Constant *C) {
+    assert(!isa<StructType>(V->getType()) && "Should use other method");
     markConstant(ValueState[V], V, C);
   }
 
+  void markForcedConstant(Value *V, Constant *C) {
+    assert(!isa<StructType>(V->getType()) && "Should use other method");
+    ValueState[V].markForcedConstant(C);
+    DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+    InstWorkList.push_back(V);
+  }
+  
+  
   // markOverdefined - Make a value be marked as "overdefined". If the
   // value is not already overdefined, add it to the overdefined instruction
   // work list so that the users of the instruction are updated later.
-  inline void markOverdefined(LatticeVal &IV, Value *V) {
-    if (IV.markOverdefined()) {
-      DEBUG(errs() << "markOverdefined: ";
-            if (Function *F = dyn_cast<Function>(V))
-              errs() << "Function '" << F->getName() << "'\n";
-            else
-              errs() << *V << '\n');
-      // Only instructions go on the work list
-      OverdefinedInstWorkList.push_back(V);
-    }
+  void markOverdefined(LatticeVal &IV, Value *V) {
+    if (!IV.markOverdefined()) return;
+    
+    DEBUG(errs() << "markOverdefined: ";
+          if (Function *F = dyn_cast<Function>(V))
+            errs() << "Function '" << F->getName() << "'\n";
+          else
+            errs() << *V << '\n');
+    // Only instructions go on the work list
+    OverdefinedInstWorkList.push_back(V);
   }
 
-  inline void mergeInValue(LatticeVal &IV, Value *V, LatticeVal &MergeWithV) {
+  void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
     if (IV.isOverdefined() || MergeWithV.isUndefined())
       return;  // Noop.
     if (MergeWithV.isOverdefined())
@@ -302,53 +359,85 @@ private:
       markOverdefined(IV, V);
   }
   
-  inline void mergeInValue(Value *V, LatticeVal &MergeWithV) {
-    return mergeInValue(ValueState[V], V, MergeWithV);
+  void mergeInValue(Value *V, LatticeVal MergeWithV) {
+    assert(!isa<StructType>(V->getType()) && "Should use other method");
+    mergeInValue(ValueState[V], V, MergeWithV);
   }
 
 
-  // getValueState - Return the LatticeVal object that corresponds to the value.
-  // This function is necessary because not all values should start out in the
-  // underdefined state... Argument's should be overdefined, and
-  // constants should be marked as constants.  If a value is not known to be an
-  // Instruction object, then use this accessor to get its value from the map.
-  //
-  inline LatticeVal &getValueState(Value *V) {
-    std::map<Value*, LatticeVal>::iterator I = ValueState.find(V);
-    if (I != ValueState.end()) return I->second;  // Common case, in the map
+  /// getValueState - Return the LatticeVal object that corresponds to the
+  /// value.  This function handles the case when the value hasn't been seen yet
+  /// by properly seeding constants etc.
+  LatticeVal &getValueState(Value *V) {
+    assert(!isa<StructType>(V->getType()) && "Should use getStructValueState");
+    
+    // TODO: Change to do insert+find in one operation.
+    DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+    if (I != ValueState.end())
+      return I->second;  // Common case, already in the map.
+
+    LatticeVal &LV = ValueState[V];
 
     if (Constant *C = dyn_cast<Constant>(V)) {
-      if (isa<UndefValue>(V)) {
-        // Nothing to do, remain undefined.
-      } else {
-        LatticeVal &LV = ValueState[C];
+      // Undef values remain undefined.
+      if (!isa<UndefValue>(V))
         LV.markConstant(C);          // Constants are constant
-        return LV;
-      }
     }
-    // All others are underdefined by default...
-    return ValueState[V];
+    
+    // All others are underdefined by default.
+    return LV;
   }
 
-  // markEdgeExecutable - Mark a basic block as executable, adding it to the BB
-  // work list if it is not already executable...
-  //
+  /// getStructValueState - Return the LatticeVal object that corresponds to the
+  /// value/field pair.  This function handles the case when the value hasn't
+  /// been seen yet by properly seeding constants etc.
+  LatticeVal &getStructValueState(Value *V, unsigned i) {
+    assert(isa<StructType>(V->getType()) && "Should use getValueState");
+    assert(i < cast<StructType>(V->getType())->getNumElements() &&
+           "Invalid element #");
+    
+    // TODO: Change to do insert+find in one operation.
+    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator
+      I = StructValueState.find(std::make_pair(V, i));
+    if (I != StructValueState.end())
+      return I->second;  // Common case, already in the map.
+    
+    LatticeVal &LV = StructValueState[std::make_pair(V, i)];
+    
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      if (isa<UndefValue>(C))
+        ; // Undef values remain undefined.
+      else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+        LV.markConstant(CS->getOperand(i));      // Constants are constant.
+      else if (isa<ConstantAggregateZero>(C)) {
+        const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+        LV.markConstant(Constant::getNullValue(FieldTy));
+      } else
+        LV.markOverdefined();      // Unknown sort of constant.
+    }
+    
+    // All others are underdefined by default.
+    return LV;
+  }
+  
+
+  /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+  /// work list if it is not already executable.
   void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
     if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
       return;  // This edge is already known to be executable!
 
-    if (BBExecutable.count(Dest)) {
-      DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
-            << " -> " << Dest->getName() << "\n");
-
-      // The destination is already executable, but we just made an edge
+    if (!MarkBlockExecutable(Dest)) {
+      // If the destination is already executable, we just made an *edge*
       // feasible that wasn't before.  Revisit the PHI nodes in the block
       // because they have potentially new operands.
-      for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
-        visitPHINode(*cast<PHINode>(I));
+      DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
+            << " -> " << Dest->getName() << "\n");
 
-    } else {
-      MarkBlockExecutable(Dest);
+      PHINode *PN;
+      for (BasicBlock::iterator I = Dest->begin();
+           (PN = dyn_cast<PHINode>(I)); ++I)
+        visitPHINode(*PN);
     }
   }
 
@@ -358,28 +447,39 @@ private:
   void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
 
   // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
-  // block to the 'To' basic block is currently feasible...
+  // block to the 'To' basic block is currently feasible.
   //
   bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
 
   // OperandChangedState - This method is invoked on all of the users of an
-  // instruction that was just changed state somehow....  Based on this
+  // instruction that was just changed state somehow.  Based on this
   // information, we need to update the specified user of this instruction.
   //
-  void OperandChangedState(User *U) {
-    // Only instructions use other variable values!
-    Instruction &I = cast<Instruction>(*U);
-    if (BBExecutable.count(I.getParent()))   // Inst is executable?
-      visit(I);
+  void OperandChangedState(Instruction *I) {
+    if (BBExecutable.count(I->getParent()))   // Inst is executable?
+      visit(*I);
+  }
+  
+  /// RemoveFromOverdefinedPHIs - If I has any entries in the
+  /// UsersOfOverdefinedPHIs map for PN, remove them now.
+  void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
+    if (UsersOfOverdefinedPHIs.empty()) return;
+    std::multimap<PHINode*, Instruction*>::iterator It, E;
+    tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+    while (It != E) {
+      if (It->second == I)
+        UsersOfOverdefinedPHIs.erase(It++);
+      else
+        ++It;
+    }
   }
 
 private:
   friend class InstVisitor<SCCPSolver>;
 
-  // visit implementations - Something changed in this instruction... Either an
+  // visit implementations - Something changed in this instruction.  Either an
   // operand made a transition, or the instruction is newly executable.  Change
   // the value type of I to reflect these changes if appropriate.
-  //
   void visitPHINode(PHINode &I);
 
   // Terminators
@@ -396,11 +496,11 @@ private:
   void visitExtractValueInst(ExtractValueInst &EVI);
   void visitInsertValueInst(InsertValueInst &IVI);
 
-  // Instructions that cannot be folded away...
-  void visitStoreInst     (Instruction &I);
+  // Instructions that cannot be folded away.
+  void visitStoreInst     (StoreInst &I);
   void visitLoadInst      (LoadInst &I);
   void visitGetElementPtrInst(GetElementPtrInst &I);
-  void visitCallInst      (CallInst &I) { 
+  void visitCallInst      (CallInst &I) {
     visitCallSite(CallSite::get(&I));
   }
   void visitInvokeInst    (InvokeInst &II) {
@@ -410,15 +510,14 @@ private:
   void visitCallSite      (CallSite CS);
   void visitUnwindInst    (TerminatorInst &I) { /*returns void*/ }
   void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
-  void visitAllocationInst(Instruction &I) { markOverdefined(&I); }
+  void visitAllocaInst    (Instruction &I) { markOverdefined(&I); }
   void visitVANextInst    (Instruction &I) { markOverdefined(&I); }
-  void visitVAArgInst     (Instruction &I) { markOverdefined(&I); }
-  void visitFreeInst      (Instruction &I) { /*returns void*/ }
+  void visitVAArgInst     (Instruction &I) { markAnythingOverdefined(&I); }
 
   void visitInstruction(Instruction &I) {
-    // If a new instruction is added to LLVM that we don't handle...
+    // If a new instruction is added to LLVM that we don't handle.
     errs() << "SCCP: Don't know how to handle: " << I;
-    markOverdefined(&I);   // Just in case
+    markAnythingOverdefined(&I);   // Just in case
   }
 };
 
@@ -434,37 +533,61 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
   if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
     if (BI->isUnconditional()) {
       Succs[0] = true;
-    } else {
-      LatticeVal &BCValue = getValueState(BI->getCondition());
-      if (BCValue.isOverdefined() ||
-          (BCValue.isConstant() && !isa<ConstantInt>(BCValue.getConstant()))) {
-        // Overdefined condition variables, and branches on unfoldable constant
-        // conditions, mean the branch could go either way.
+      return;
+    }
+    
+    LatticeVal BCValue = getValueState(BI->getCondition());
+    ConstantInt *CI = BCValue.getConstantInt();
+    if (CI == 0) {
+      // Overdefined condition variables, and branches on unfoldable constant
+      // conditions, mean the branch could go either way.
+      if (!BCValue.isUndefined())
         Succs[0] = Succs[1] = true;
-      } else if (BCValue.isConstant()) {
-        // Constant condition variables mean the branch can only go a single way
-        Succs[BCValue.getConstant() == ConstantInt::getFalse(*Context)] = true;
-      }
+      return;
     }
-  } else if (isa<InvokeInst>(&TI)) {
+    
+    // Constant condition variables mean the branch can only go a single way.
+    Succs[CI->isZero()] = true;
+    return;
+  }
+  
+  if (isa<InvokeInst>(TI)) {
     // Invoke instructions successors are always executable.
     Succs[0] = Succs[1] = true;
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
-    LatticeVal &SCValue = getValueState(SI->getCondition());
-    if (SCValue.isOverdefined() ||   // Overdefined condition?
-        (SCValue.isConstant() && !isa<ConstantInt>(SCValue.getConstant()))) {
+    return;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+    LatticeVal SCValue = getValueState(SI->getCondition());
+    ConstantInt *CI = SCValue.getConstantInt();
+    
+    if (CI == 0) {   // Overdefined or undefined condition?
       // All destinations are executable!
-      Succs.assign(TI.getNumSuccessors(), true);
-    } else if (SCValue.isConstant())
-      Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true;
-  } else {
-    llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+      if (!SCValue.isUndefined())
+        Succs.assign(TI.getNumSuccessors(), true);
+      return;
+    }
+      
+    Succs[SI->findCaseValue(CI)] = true;
+    return;
+  }
+  
+  // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+  if (isa<IndirectBrInst>(&TI)) {
+    // Just mark all destinations executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
   }
+  
+#ifndef NDEBUG
+  errs() << "Unknown terminator instruction: " << TI << '\n';
+#endif
+  llvm_unreachable("SCCP: Don't know how to handle this terminator!");
 }
 
 
 // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
-// block to the 'To' basic block is currently feasible...
+// block to the 'To' basic block is currently feasible.
 //
 bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
   assert(BBExecutable.count(To) && "Dest should always be alive!");
@@ -472,58 +595,57 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
   // Make sure the source basic block is executable!!
   if (!BBExecutable.count(From)) return false;
 
-  // Check to make sure this edge itself is actually feasible now...
+  // Check to make sure this edge itself is actually feasible now.
   TerminatorInst *TI = From->getTerminator();
   if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
     if (BI->isUnconditional())
       return true;
-    else {
-      LatticeVal &BCValue = getValueState(BI->getCondition());
-      if (BCValue.isOverdefined()) {
-        // Overdefined condition variables mean the branch could go either way.
-        return true;
-      } else if (BCValue.isConstant()) {
-        // Not branching on an evaluatable constant?
-        if (!isa<ConstantInt>(BCValue.getConstant())) return true;
+    
+    LatticeVal BCValue = getValueState(BI->getCondition());
 
-        // Constant condition variables mean the branch can only go a single way
-        return BI->getSuccessor(BCValue.getConstant() ==
-                                       ConstantInt::getFalse(*Context)) == To;
-      }
-      return false;
-    }
-  } else if (isa<InvokeInst>(TI)) {
-    // Invoke instructions successors are always executable.
+    // Overdefined condition variables mean the branch could go either way,
+    // undef conditions mean that neither edge is feasible yet.
+    ConstantInt *CI = BCValue.getConstantInt();
+    if (CI == 0)
+      return !BCValue.isUndefined();
+    
+    // Constant condition variables mean the branch can only go a single way.
+    return BI->getSuccessor(CI->isZero()) == To;
+  }
+  
+  // Invoke instructions successors are always executable.
+  if (isa<InvokeInst>(TI))
     return true;
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-    LatticeVal &SCValue = getValueState(SI->getCondition());
-    if (SCValue.isOverdefined()) {  // Overdefined condition?
-      // All destinations are executable!
-      return true;
-    } else if (SCValue.isConstant()) {
-      Constant *CPV = SCValue.getConstant();
-      if (!isa<ConstantInt>(CPV))
-        return true;  // not a foldable constant?
-
-      // Make sure to skip the "default value" which isn't a value
-      for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
-        if (SI->getSuccessorValue(i) == CPV) // Found the taken branch...
-          return SI->getSuccessor(i) == To;
-
-      // Constant value not equal to any of the branches... must execute
-      // default branch then...
-      return SI->getDefaultDest() == To;
-    }
-    return false;
-  } else {
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    LatticeVal SCValue = getValueState(SI->getCondition());
+    ConstantInt *CI = SCValue.getConstantInt();
+    
+    if (CI == 0)
+      return !SCValue.isUndefined();
+
+    // Make sure to skip the "default value" which isn't a value
+    for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
+      if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
+        return SI->getSuccessor(i) == To;
+
+    // If the constant value is not equal to any of the branches, we must
+    // execute default branch.
+    return SI->getDefaultDest() == To;
+  }
+  
+  // Just mark all destinations executable!
+  // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+  if (isa<IndirectBrInst>(&TI))
+    return true;
+  
 #ifndef NDEBUG
-    errs() << "Unknown terminator instruction: " << *TI << '\n';
+  errs() << "Unknown terminator instruction: " << *TI << '\n';
 #endif
-    llvm_unreachable(0);
-  }
+  llvm_unreachable(0);
 }
 
-// visit Implementations - Something changed in this instruction... Either an
+// visit Implementations - Something changed in this instruction, either an
 // operand made a transition, or the instruction is newly executable.  Change
 // the value type of I to reflect these changes if appropriate.  This method
 // makes sure to do the following actions:
@@ -542,31 +664,33 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 //    successors executable.
 //
 void SCCPSolver::visitPHINode(PHINode &PN) {
-  LatticeVal &PNIV = getValueState(&PN);
-  if (PNIV.isOverdefined()) {
+  // If this PN returns a struct, just mark the result overdefined.
+  // TODO: We could do a lot better than this if code actually uses this.
+  if (isa<StructType>(PN.getType()))
+    return markAnythingOverdefined(&PN);
+  
+  if (getValueState(&PN).isOverdefined()) {
     // There may be instructions using this PHI node that are not overdefined
     // themselves.  If so, make sure that they know that the PHI node operand
     // changed.
     std::multimap<PHINode*, Instruction*>::iterator I, E;
     tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
-    if (I != E) {
-      SmallVector<Instruction*, 16> Users;
-      for (; I != E; ++I) Users.push_back(I->second);
-      while (!Users.empty()) {
-        visit(Users.back());
-        Users.pop_back();
-      }
-    }
+    if (I == E)
+      return;
+    
+    SmallVector<Instruction*, 16> Users;
+    for (; I != E; ++I)
+      Users.push_back(I->second);
+    while (!Users.empty())
+      visit(Users.pop_back_val());
     return;  // Quick exit
   }
 
   // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
   // and slow us down a lot.  Just mark them overdefined.
-  if (PN.getNumIncomingValues() > 64) {
-    markOverdefined(PNIV, &PN);
-    return;
-  }
-
+  if (PN.getNumIncomingValues() > 64)
+    return markOverdefined(&PN);
+  
   // Look at all of the executable operands of the PHI node.  If any of them
   // are overdefined, the PHI becomes overdefined as well.  If they are all
   // constant, and they agree with each other, the PHI becomes the identical
@@ -575,32 +699,28 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
   //
   Constant *OperandVal = 0;
   for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
-    LatticeVal &IV = getValueState(PN.getIncomingValue(i));
+    LatticeVal IV = getValueState(PN.getIncomingValue(i));
     if (IV.isUndefined()) continue;  // Doesn't influence PHI node.
 
-    if (isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) {
-      if (IV.isOverdefined()) {   // PHI node becomes overdefined!
-        markOverdefined(&PN);
-        return;
-      }
+    if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+      continue;
+    
+    if (IV.isOverdefined())    // PHI node becomes overdefined!
+      return markOverdefined(&PN);
 
-      if (OperandVal == 0) {   // Grab the first value...
-        OperandVal = IV.getConstant();
-      } else {                // Another value is being merged in!
-        // There is already a reachable operand.  If we conflict with it,
-        // then the PHI node becomes overdefined.  If we agree with it, we
-        // can continue on.
-
-        // Check to see if there are two different constants merging...
-        if (IV.getConstant() != OperandVal) {
-          // Yes there is.  This means the PHI node is not constant.
-          // You must be overdefined poor PHI.
-          //
-          markOverdefined(&PN);    // The PHI node now becomes overdefined
-          return;    // I'm done analyzing you
-        }
-      }
+    if (OperandVal == 0) {   // Grab the first value.
+      OperandVal = IV.getConstant();
+      continue;
     }
+    
+    // There is already a reachable operand.  If we conflict with it,
+    // then the PHI node becomes overdefined.  If we agree with it, we
+    // can continue on.
+    
+    // Check to see if there are two different constants merging, if so, the PHI
+    // node is overdefined.
+    if (IV.getConstant() != OperandVal)
+      return markOverdefined(&PN);
   }
 
   // If we exited the loop, this means that the PHI node only has constant
@@ -612,44 +732,33 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
     markConstant(&PN, OperandVal);      // Acquire operand value
 }
 
+
+
+
 void SCCPSolver::visitReturnInst(ReturnInst &I) {
-  if (I.getNumOperands() == 0) return;  // Ret void
+  if (I.getNumOperands() == 0) return;  // ret void
 
   Function *F = I.getParent()->getParent();
+  Value *ResultOp = I.getOperand(0);
+  
   // If we are tracking the return value of this function, merge it in.
-  if (!F->hasLocalLinkage())
-    return;
-
-  if (!TrackedRetVals.empty() && I.getNumOperands() == 1) {
+  if (!TrackedRetVals.empty() && !isa<StructType>(ResultOp->getType())) {
     DenseMap<Function*, LatticeVal>::iterator TFRVI =
       TrackedRetVals.find(F);
-    if (TFRVI != TrackedRetVals.end() &&
-        !TFRVI->second.isOverdefined()) {
-      LatticeVal &IV = getValueState(I.getOperand(0));
-      mergeInValue(TFRVI->second, F, IV);
+    if (TFRVI != TrackedRetVals.end()) {
+      mergeInValue(TFRVI->second, F, getValueState(ResultOp));
       return;
     }
   }
   
   // Handle functions that return multiple values.
-  if (!TrackedMultipleRetVals.empty() && I.getNumOperands() > 1) {
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-      DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
-        It = TrackedMultipleRetVals.find(std::make_pair(F, i));
-      if (It == TrackedMultipleRetVals.end()) break;
-      mergeInValue(It->second, F, getValueState(I.getOperand(i)));
-    }
-  } else if (!TrackedMultipleRetVals.empty() &&
-             I.getNumOperands() == 1 &&
-             isa<StructType>(I.getOperand(0)->getType())) {
-    for (unsigned i = 0, e = I.getOperand(0)->getType()->getNumContainedTypes();
-         i != e; ++i) {
-      DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
-        It = TrackedMultipleRetVals.find(std::make_pair(F, i));
-      if (It == TrackedMultipleRetVals.end()) break;
-      if (Value *Val = FindInsertedValue(I.getOperand(0), i, I.getContext()))
-        mergeInValue(It->second, F, getValueState(Val));
-    }
+  if (!TrackedMultipleRetVals.empty()) {
+    if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+      if (MRVFunctionsTracked.count(F))
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+                       getStructValueState(ResultOp, i));
+    
   }
 }
 
@@ -659,356 +768,306 @@ void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
 
   BasicBlock *BB = TI.getParent();
 
-  // Mark all feasible successors executable...
+  // Mark all feasible successors executable.
   for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
     if (SuccFeasible[i])
       markEdgeExecutable(BB, TI.getSuccessor(i));
 }
 
 void SCCPSolver::visitCastInst(CastInst &I) {
-  Value *V = I.getOperand(0);
-  LatticeVal &VState = getValueState(V);
-  if (VState.isOverdefined())          // Inherit overdefinedness of operand
+  LatticeVal OpSt = getValueState(I.getOperand(0));
+  if (OpSt.isOverdefined())          // Inherit overdefinedness of operand
     markOverdefined(&I);
-  else if (VState.isConstant())        // Propagate constant value
+  else if (OpSt.isConstant())        // Propagate constant value
     markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
-                                           VState.getConstant(), I.getType()));
+                                           OpSt.getConstant(), I.getType()));
 }
 
-void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
-  Value *Aggr = EVI.getAggregateOperand();
-
-  // If the operand to the extractvalue is an undef, the result is undef.
-  if (isa<UndefValue>(Aggr))
-    return;
 
-  // Currently only handle single-index extractvalues.
-  if (EVI.getNumIndices() != 1) {
-    markOverdefined(&EVI);
-    return;
-  }
-  
-  Function *F = 0;
-  if (CallInst *CI = dyn_cast<CallInst>(Aggr))
-    F = CI->getCalledFunction();
-  else if (InvokeInst *II = dyn_cast<InvokeInst>(Aggr))
-    F = II->getCalledFunction();
-
-  // TODO: If IPSCCP resolves the callee of this function, we could propagate a
-  // result back!
-  if (F == 0 || TrackedMultipleRetVals.empty()) {
-    markOverdefined(&EVI);
-    return;
-  }
-  
-  // See if we are tracking the result of the callee.  If not tracking this
-  // function (for example, it is a declaration) just move to overdefined.
-  if (!TrackedMultipleRetVals.count(std::make_pair(F, *EVI.idx_begin()))) {
-    markOverdefined(&EVI);
-    return;
-  }
-  
-  // Otherwise, the value will be merged in here as a result of CallSite
-  // handling.
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+  // If this returns a struct, mark all elements over defined, we don't track
+  // structs in structs.
+  if (isa<StructType>(EVI.getType()))
+    return markAnythingOverdefined(&EVI);
+    
+  // If this is extracting from more than one level of struct, we don't know.
+  if (EVI.getNumIndices() != 1)
+    return markOverdefined(&EVI);
+
+  Value *AggVal = EVI.getAggregateOperand();
+  unsigned i = *EVI.idx_begin();
+  LatticeVal EltVal = getStructValueState(AggVal, i);
+  mergeInValue(getValueState(&EVI), &EVI, EltVal);
 }
 
 void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+  const StructType *STy = dyn_cast<StructType>(IVI.getType());
+  if (STy == 0)
+    return markOverdefined(&IVI);
+  
+  // If this has more than one index, we can't handle it, drive all results to
+  // undef.
+  if (IVI.getNumIndices() != 1)
+    return markAnythingOverdefined(&IVI);
+  
   Value *Aggr = IVI.getAggregateOperand();
-  Value *Val = IVI.getInsertedValueOperand();
-
-  // If the operands to the insertvalue are undef, the result is undef.
-  if (isa<UndefValue>(Aggr) && isa<UndefValue>(Val))
-    return;
-
-  // Currently only handle single-index insertvalues.
-  if (IVI.getNumIndices() != 1) {
-    markOverdefined(&IVI);
-    return;
-  }
-
-  // Currently only handle insertvalue instructions that are in a single-use
-  // chain that builds up a return value.
-  for (const InsertValueInst *TmpIVI = &IVI; ; ) {
-    if (!TmpIVI->hasOneUse()) {
-      markOverdefined(&IVI);
-      return;
+  unsigned Idx = *IVI.idx_begin();
+  
+  // Compute the result based on what we're inserting.
+  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    // This passes through all values that aren't the inserted element.
+    if (i != Idx) {
+      LatticeVal EltVal = getStructValueState(Aggr, i);
+      mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+      continue;
     }
-    const Value *V = *TmpIVI->use_begin();
-    if (isa<ReturnInst>(V))
-      break;
-    TmpIVI = dyn_cast<InsertValueInst>(V);
-    if (!TmpIVI) {
-      markOverdefined(&IVI);
-      return;
+    
+    Value *Val = IVI.getInsertedValueOperand();
+    if (isa<StructType>(Val->getType()))
+      // We don't track structs in structs.
+      markOverdefined(getStructValueState(&IVI, i), &IVI);
+    else {
+      LatticeVal InVal = getValueState(Val);
+      mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
     }
   }
-  
-  // See if we are tracking the result of the callee.
-  Function *F = IVI.getParent()->getParent();
-  DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
-    It = TrackedMultipleRetVals.find(std::make_pair(F, *IVI.idx_begin()));
-
-  // Merge in the inserted member value.
-  if (It != TrackedMultipleRetVals.end())
-    mergeInValue(It->second, F, getValueState(Val));
-
-  // Mark the aggregate result of the IVI overdefined; any tracking that we do
-  // will be done on the individual member values.
-  markOverdefined(&IVI);
 }
 
 void SCCPSolver::visitSelectInst(SelectInst &I) {
-  LatticeVal &CondValue = getValueState(I.getCondition());
+  // If this select returns a struct, just mark the result overdefined.
+  // TODO: We could do a lot better than this if code actually uses this.
+  if (isa<StructType>(I.getType()))
+    return markAnythingOverdefined(&I);
+  
+  LatticeVal CondValue = getValueState(I.getCondition());
   if (CondValue.isUndefined())
     return;
-  if (CondValue.isConstant()) {
-    if (ConstantInt *CondCB = dyn_cast<ConstantInt>(CondValue.getConstant())){
-      mergeInValue(&I, getValueState(CondCB->getZExtValue() ? I.getTrueValue()
-                                                          : I.getFalseValue()));
-      return;
-    }
+  
+  if (ConstantInt *CondCB = CondValue.getConstantInt()) {
+    Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+    mergeInValue(&I, getValueState(OpVal));
+    return;
   }
   
   // Otherwise, the condition is overdefined or a constant we can't evaluate.
   // See if we can produce something better than overdefined based on the T/F
   // value.
-  LatticeVal &TVal = getValueState(I.getTrueValue());
-  LatticeVal &FVal = getValueState(I.getFalseValue());
+  LatticeVal TVal = getValueState(I.getTrueValue());
+  LatticeVal FVal = getValueState(I.getFalseValue());
   
   // select ?, C, C -> C.
   if (TVal.isConstant() && FVal.isConstant() && 
-      TVal.getConstant() == FVal.getConstant()) {
-    markConstant(&I, FVal.getConstant());
-    return;
-  }
+      TVal.getConstant() == FVal.getConstant())
+    return markConstant(&I, FVal.getConstant());
 
-  if (TVal.isUndefined()) {  // select ?, undef, X -> X.
-    mergeInValue(&I, FVal);
-  } else if (FVal.isUndefined()) {  // select ?, X, undef -> X.
-    mergeInValue(&I, TVal);
-  } else {
-    markOverdefined(&I);
-  }
+  if (TVal.isUndefined())   // select ?, undef, X -> X.
+    return mergeInValue(&I, FVal);
+  if (FVal.isUndefined())   // select ?, X, undef -> X.
+    return mergeInValue(&I, TVal);
+  markOverdefined(&I);
 }
 
-// Handle BinaryOperators and Shift Instructions...
+// Handle Binary Operators.
 void SCCPSolver::visitBinaryOperator(Instruction &I) {
+  LatticeVal V1State = getValueState(I.getOperand(0));
+  LatticeVal V2State = getValueState(I.getOperand(1));
+  
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
-  LatticeVal &V1State = getValueState(I.getOperand(0));
-  LatticeVal &V2State = getValueState(I.getOperand(1));
-
-  if (V1State.isOverdefined() || V2State.isOverdefined()) {
-    // If this is an AND or OR with 0 or -1, it doesn't matter that the other
-    // operand is overdefined.
-    if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
-      LatticeVal *NonOverdefVal = 0;
-      if (!V1State.isOverdefined()) {
-        NonOverdefVal = &V1State;
-      } else if (!V2State.isOverdefined()) {
-        NonOverdefVal = &V2State;
+  if (V1State.isConstant() && V2State.isConstant())
+    return markConstant(IV, &I,
+                        ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+                                          V2State.getConstant()));
+  
+  // If something is undef, wait for it to resolve.
+  if (!V1State.isOverdefined() && !V2State.isOverdefined())
+    return;
+  
+  // Otherwise, one of our operands is overdefined.  Try to produce something
+  // better than overdefined with some tricks.
+  
+  // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+  // operand is overdefined.
+  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+    LatticeVal *NonOverdefVal = 0;
+    if (!V1State.isOverdefined())
+      NonOverdefVal = &V1State;
+    else if (!V2State.isOverdefined())
+      NonOverdefVal = &V2State;
+
+    if (NonOverdefVal) {
+      if (NonOverdefVal->isUndefined()) {
+        // Could annihilate value.
+        if (I.getOpcode() == Instruction::And)
+          markConstant(IV, &I, Constant::getNullValue(I.getType()));
+        else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+          markConstant(IV, &I, Constant::getAllOnesValue(PT));
+        else
+          markConstant(IV, &I,
+                       Constant::getAllOnesValue(I.getType()));
+        return;
       }
-
-      if (NonOverdefVal) {
-        if (NonOverdefVal->isUndefined()) {
-          // Could annihilate value.
-          if (I.getOpcode() == Instruction::And)
-            markConstant(IV, &I, Constant::getNullValue(I.getType()));
-          else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
-            markConstant(IV, &I, Constant::getAllOnesValue(PT));
-          else
-            markConstant(IV, &I,
-                         Constant::getAllOnesValue(I.getType()));
-          return;
-        } else {
-          if (I.getOpcode() == Instruction::And) {
-            if (NonOverdefVal->getConstant()->isNullValue()) {
-              markConstant(IV, &I, NonOverdefVal->getConstant());
-              return;      // X and 0 = 0
-            }
-          } else {
-            if (ConstantInt *CI =
-                     dyn_cast<ConstantInt>(NonOverdefVal->getConstant()))
-              if (CI->isAllOnesValue()) {
-                markConstant(IV, &I, NonOverdefVal->getConstant());
-                return;    // X or -1 = -1
-              }
-          }
-        }
+      
+      if (I.getOpcode() == Instruction::And) {
+        // X and 0 = 0
+        if (NonOverdefVal->getConstant()->isNullValue())
+          return markConstant(IV, &I, NonOverdefVal->getConstant());
+      } else {
+        if (ConstantInt *CI = NonOverdefVal->getConstantInt())
+          if (CI->isAllOnesValue())     // X or -1 = -1
+            return markConstant(IV, &I, NonOverdefVal->getConstant());
       }
     }
+  }
 
 
-    // If both operands are PHI nodes, it is possible that this instruction has
-    // a constant value, despite the fact that the PHI node doesn't.  Check for
-    // this condition now.
-    if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
-      if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
-        if (PN1->getParent() == PN2->getParent()) {
-          // Since the two PHI nodes are in the same basic block, they must have
-          // entries for the same predecessors.  Walk the predecessor list, and
-          // if all of the incoming values are constants, and the result of
-          // evaluating this expression with all incoming value pairs is the
-          // same, then this expression is a constant even though the PHI node
-          // is not a constant!
-          LatticeVal Result;
-          for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
-            LatticeVal &In1 = getValueState(PN1->getIncomingValue(i));
-            BasicBlock *InBlock = PN1->getIncomingBlock(i);
-            LatticeVal &In2 =
-              getValueState(PN2->getIncomingValueForBlock(InBlock));
-
-            if (In1.isOverdefined() || In2.isOverdefined()) {
+  // If both operands are PHI nodes, it is possible that this instruction has
+  // a constant value, despite the fact that the PHI node doesn't.  Check for
+  // this condition now.
+  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+      if (PN1->getParent() == PN2->getParent()) {
+        // Since the two PHI nodes are in the same basic block, they must have
+        // entries for the same predecessors.  Walk the predecessor list, and
+        // if all of the incoming values are constants, and the result of
+        // evaluating this expression with all incoming value pairs is the
+        // same, then this expression is a constant even though the PHI node
+        // is not a constant!
+        LatticeVal Result;
+        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+          BasicBlock *InBlock = PN1->getIncomingBlock(i);
+          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+          if (In1.isOverdefined() || In2.isOverdefined()) {
+            Result.markOverdefined();
+            break;  // Cannot fold this operation over the PHI nodes!
+          }
+          
+          if (In1.isConstant() && In2.isConstant()) {
+            Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+                                            In2.getConstant());
+            if (Result.isUndefined())
+              Result.markConstant(V);
+            else if (Result.isConstant() && Result.getConstant() != V) {
               Result.markOverdefined();
-              break;  // Cannot fold this operation over the PHI nodes!
-            } else if (In1.isConstant() && In2.isConstant()) {
-              Constant *V =
-                     ConstantExpr::get(I.getOpcode(), In1.getConstant(),
-                                              In2.getConstant());
-              if (Result.isUndefined())
-                Result.markConstant(V);
-              else if (Result.isConstant() && Result.getConstant() != V) {
-                Result.markOverdefined();
-                break;
-              }
+              break;
             }
           }
+        }
 
-          // If we found a constant value here, then we know the instruction is
-          // constant despite the fact that the PHI nodes are overdefined.
-          if (Result.isConstant()) {
-            markConstant(IV, &I, Result.getConstant());
-            // Remember that this instruction is virtually using the PHI node
-            // operands.
-            UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
-            UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
-            return;
-          } else if (Result.isUndefined()) {
-            return;
-          }
-
-          // Okay, this really is overdefined now.  Since we might have
-          // speculatively thought that this was not overdefined before, and
-          // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
-          // make sure to clean out any entries that we put there, for
-          // efficiency.
-          std::multimap<PHINode*, Instruction*>::iterator It, E;
-          tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN1);
-          while (It != E) {
-            if (It->second == &I) {
-              UsersOfOverdefinedPHIs.erase(It++);
-            } else
-              ++It;
-          }
-          tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN2);
-          while (It != E) {
-            if (It->second == &I) {
-              UsersOfOverdefinedPHIs.erase(It++);
-            } else
-              ++It;
-          }
+        // If we found a constant value here, then we know the instruction is
+        // constant despite the fact that the PHI nodes are overdefined.
+        if (Result.isConstant()) {
+          markConstant(IV, &I, Result.getConstant());
+          // Remember that this instruction is virtually using the PHI node
+          // operands.
+          UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
+          UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+          return;
         }
+        
+        if (Result.isUndefined())
+          return;
 
-    markOverdefined(IV, &I);
-  } else if (V1State.isConstant() && V2State.isConstant()) {
-    markConstant(IV, &I,
-                ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
-                                           V2State.getConstant()));
-  }
+        // Okay, this really is overdefined now.  Since we might have
+        // speculatively thought that this was not overdefined before, and
+        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+        // make sure to clean out any entries that we put there, for
+        // efficiency.
+        RemoveFromOverdefinedPHIs(&I, PN1);
+        RemoveFromOverdefinedPHIs(&I, PN2);
+      }
+
+  markOverdefined(&I);
 }
 
-// Handle ICmpInst instruction...
+// Handle ICmpInst instruction.
 void SCCPSolver::visitCmpInst(CmpInst &I) {
+  LatticeVal V1State = getValueState(I.getOperand(0));
+  LatticeVal V2State = getValueState(I.getOperand(1));
+
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
-  LatticeVal &V1State = getValueState(I.getOperand(0));
-  LatticeVal &V2State = getValueState(I.getOperand(1));
-
-  if (V1State.isOverdefined() || V2State.isOverdefined()) {
-    // If both operands are PHI nodes, it is possible that this instruction has
-    // a constant value, despite the fact that the PHI node doesn't.  Check for
-    // this condition now.
-    if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
-      if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
-        if (PN1->getParent() == PN2->getParent()) {
-          // Since the two PHI nodes are in the same basic block, they must have
-          // entries for the same predecessors.  Walk the predecessor list, and
-          // if all of the incoming values are constants, and the result of
-          // evaluating this expression with all incoming value pairs is the
-          // same, then this expression is a constant even though the PHI node
-          // is not a constant!
-          LatticeVal Result;
-          for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
-            LatticeVal &In1 = getValueState(PN1->getIncomingValue(i));
-            BasicBlock *InBlock = PN1->getIncomingBlock(i);
-            LatticeVal &In2 =
-              getValueState(PN2->getIncomingValueForBlock(InBlock));
-
-            if (In1.isOverdefined() || In2.isOverdefined()) {
+  if (V1State.isConstant() && V2State.isConstant())
+    return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
+                                                         V1State.getConstant(), 
+                                                        V2State.getConstant()));
+  
+  // If operands are still undefined, wait for it to resolve.
+  if (!V1State.isOverdefined() && !V2State.isOverdefined())
+    return;
+  
+  // If something is overdefined, use some tricks to avoid ending up and over
+  // defined if we can.
+  
+  // If both operands are PHI nodes, it is possible that this instruction has
+  // a constant value, despite the fact that the PHI node doesn't.  Check for
+  // this condition now.
+  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+      if (PN1->getParent() == PN2->getParent()) {
+        // Since the two PHI nodes are in the same basic block, they must have
+        // entries for the same predecessors.  Walk the predecessor list, and
+        // if all of the incoming values are constants, and the result of
+        // evaluating this expression with all incoming value pairs is the
+        // same, then this expression is a constant even though the PHI node
+        // is not a constant!
+        LatticeVal Result;
+        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+          BasicBlock *InBlock = PN1->getIncomingBlock(i);
+          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+          if (In1.isOverdefined() || In2.isOverdefined()) {
+            Result.markOverdefined();
+            break;  // Cannot fold this operation over the PHI nodes!
+          }
+          
+          if (In1.isConstant() && In2.isConstant()) {
+            Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
+                                                   In1.getConstant(), 
+                                                   In2.getConstant());
+            if (Result.isUndefined())
+              Result.markConstant(V);
+            else if (Result.isConstant() && Result.getConstant() != V) {
               Result.markOverdefined();
-              break;  // Cannot fold this operation over the PHI nodes!
-            } else if (In1.isConstant() && In2.isConstant()) {
-              Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
-                                                     In1.getConstant(), 
-                                                     In2.getConstant());
-              if (Result.isUndefined())
-                Result.markConstant(V);
-              else if (Result.isConstant() && Result.getConstant() != V) {
-                Result.markOverdefined();
-                break;
-              }
+              break;
             }
           }
+        }
 
-          // If we found a constant value here, then we know the instruction is
-          // constant despite the fact that the PHI nodes are overdefined.
-          if (Result.isConstant()) {
-            markConstant(IV, &I, Result.getConstant());
-            // Remember that this instruction is virtually using the PHI node
-            // operands.
-            UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
-            UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
-            return;
-          } else if (Result.isUndefined()) {
-            return;
-          }
-
-          // Okay, this really is overdefined now.  Since we might have
-          // speculatively thought that this was not overdefined before, and
-          // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
-          // make sure to clean out any entries that we put there, for
-          // efficiency.
-          std::multimap<PHINode*, Instruction*>::iterator It, E;
-          tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN1);
-          while (It != E) {
-            if (It->second == &I) {
-              UsersOfOverdefinedPHIs.erase(It++);
-            } else
-              ++It;
-          }
-          tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN2);
-          while (It != E) {
-            if (It->second == &I) {
-              UsersOfOverdefinedPHIs.erase(It++);
-            } else
-              ++It;
-          }
+        // If we found a constant value here, then we know the instruction is
+        // constant despite the fact that the PHI nodes are overdefined.
+        if (Result.isConstant()) {
+          markConstant(&I, Result.getConstant());
+          // Remember that this instruction is virtually using the PHI node
+          // operands.
+          UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
+          UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+          return;
         }
+        
+        if (Result.isUndefined())
+          return;
 
-    markOverdefined(IV, &I);
-  } else if (V1State.isConstant() && V2State.isConstant()) {
-    markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
-                                                  V1State.getConstant(), 
-                                                  V2State.getConstant()));
-  }
+        // Okay, this really is overdefined now.  Since we might have
+        // speculatively thought that this was not overdefined before, and
+        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+        // make sure to clean out any entries that we put there, for
+        // efficiency.
+        RemoveFromOverdefinedPHIs(&I, PN1);
+        RemoveFromOverdefinedPHIs(&I, PN2);
+      }
+
+  markOverdefined(&I);
 }
 
 void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
-  // FIXME : SCCP does not handle vectors properly.
-  markOverdefined(&I);
-  return;
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
 
 #if 0
   LatticeVal &ValState = getValueState(I.getOperand(0));
@@ -1023,9 +1082,8 @@ void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
 }
 
 void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
-  // FIXME : SCCP does not handle vectors properly.
-  markOverdefined(&I);
-  return;
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
 #if 0
   LatticeVal &ValState = getValueState(I.getOperand(0));
   LatticeVal &EltState = getValueState(I.getOperand(1));
@@ -1048,9 +1106,8 @@ void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
 }
 
 void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
-  // FIXME : SCCP does not handle vectors properly.
-  markOverdefined(&I);
-  return;
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
 #if 0
   LatticeVal &V1State   = getValueState(I.getOperand(0));
   LatticeVal &V2State   = getValueState(I.getOperand(1));
@@ -1076,46 +1133,46 @@ void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
 #endif
 }
 
-// Handle getelementptr instructions... if all operands are constants then we
+// Handle getelementptr instructions.  If all operands are constants then we
 // can turn this into a getelementptr ConstantExpr.
 //
 void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
-  LatticeVal &IV = ValueState[&I];
-  if (IV.isOverdefined()) return;
+  if (ValueState[&I].isOverdefined()) return;
 
   SmallVector<Constant*, 8> Operands;
   Operands.reserve(I.getNumOperands());
 
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-    LatticeVal &State = getValueState(I.getOperand(i));
+    LatticeVal State = getValueState(I.getOperand(i));
     if (State.isUndefined())
-      return;  // Operands are not resolved yet...
-    else if (State.isOverdefined()) {
-      markOverdefined(IV, &I);
-      return;
-    }
+      return;  // Operands are not resolved yet.
+    
+    if (State.isOverdefined())
+      return markOverdefined(&I);
+
     assert(State.isConstant() && "Unknown state!");
     Operands.push_back(State.getConstant());
   }
 
   Constant *Ptr = Operands[0];
-  Operands.erase(Operands.begin());  // Erase the pointer from idx list...
-
-  markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0],
-                                                      Operands.size()));
+  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
+                                                  Operands.size()-1));
 }
 
-void SCCPSolver::visitStoreInst(Instruction &SI) {
+void SCCPSolver::visitStoreInst(StoreInst &SI) {
+  // If this store is of a struct, ignore it.
+  if (isa<StructType>(SI.getOperand(0)->getType()))
+    return;
+  
   if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
     return;
+  
   GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
   DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
   if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
 
-  // Get the value we are storing into the global.
-  LatticeVal &PtrVal = getValueState(SI.getOperand(0));
-
-  mergeInValue(I->second, GV, PtrVal);
+  // Get the value we are storing into the global, then merge it.
+  mergeInValue(I->second, GV, getValueState(SI.getOperand(0)));
   if (I->second.isOverdefined())
     TrackedGlobals.erase(I);      // No need to keep tracking this!
 }
@@ -1124,50 +1181,42 @@ void SCCPSolver::visitStoreInst(Instruction &SI) {
 // Handle load instructions.  If the operand is a constant pointer to a constant
 // global, we can replace the load with the loaded constant value!
 void SCCPSolver::visitLoadInst(LoadInst &I) {
+  // If this load is of a struct, just mark the result overdefined.
+  if (isa<StructType>(I.getType()))
+    return markAnythingOverdefined(&I);
+  
+  LatticeVal PtrVal = getValueState(I.getOperand(0));
+  if (PtrVal.isUndefined()) return;   // The pointer is not resolved yet!
+  
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
-  LatticeVal &PtrVal = getValueState(I.getOperand(0));
-  if (PtrVal.isUndefined()) return;   // The pointer is not resolved yet!
-  if (PtrVal.isConstant() && !I.isVolatile()) {
-    Value *Ptr = PtrVal.getConstant();
-    // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) {
-      // load null -> null
-      markConstant(IV, &I, Constant::getNullValue(I.getType()));
-      return;
-    }
+  if (!PtrVal.isConstant() || I.isVolatile())
+    return markOverdefined(IV, &I);
+    
+  Constant *Ptr = PtrVal.getConstant();
 
-    // Transform load (constant global) into the value loaded.
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
-      if (GV->isConstant()) {
-        if (GV->hasDefinitiveInitializer()) {
-          markConstant(IV, &I, GV->getInitializer());
-          return;
-        }
-      } else if (!TrackedGlobals.empty()) {
-        // If we are tracking this global, merge in the known value for it.
-        DenseMap<GlobalVariable*, LatticeVal>::iterator It =
-          TrackedGlobals.find(GV);
-        if (It != TrackedGlobals.end()) {
-          mergeInValue(IV, &I, It->second);
-          return;
-        }
+  // load null -> null
+  if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
+    return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+  
+  // Transform load (constant global) into the value loaded.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+    if (!TrackedGlobals.empty()) {
+      // If we are tracking this global, merge in the known value for it.
+      DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+        TrackedGlobals.find(GV);
+      if (It != TrackedGlobals.end()) {
+        mergeInValue(IV, &I, It->second);
+        return;
       }
     }
-
-    // Transform load (constantexpr_GEP global, 0, ...) into the value loaded.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
-      if (CE->getOpcode() == Instruction::GetElementPtr)
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
-      if (GV->isConstant() && GV->hasDefinitiveInitializer())
-        if (Constant *V =
-             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) {
-          markConstant(IV, &I, V);
-          return;
-        }
   }
 
+  // Transform load from a constant into a constant if possible.
+  if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+    return markConstant(IV, &I, C);
+
   // Otherwise we cannot say for certain what value this load will produce.
   // Bail out.
   markOverdefined(IV, &I);
@@ -1180,97 +1229,83 @@ void SCCPSolver::visitCallSite(CallSite CS) {
   // The common case is that we aren't tracking the callee, either because we
   // are not doing interprocedural analysis or the callee is indirect, or is
   // external.  Handle these cases first.
-  if (F == 0 || !F->hasLocalLinkage()) {
+  if (F == 0 || F->isDeclaration()) {
 CallOverdefined:
     // Void return and not tracking callee, just bail.
     if (I->getType()->isVoidTy()) return;
     
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
-    if (!isa<StructType>(I->getType()) && F && F->isDeclaration() && 
+    if (F && F->isDeclaration() && !isa<StructType>(I->getType()) &&
         canConstantFoldCallTo(F)) {
       
       SmallVector<Constant*, 8> Operands;
       for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
            AI != E; ++AI) {
-        LatticeVal &State = getValueState(*AI);
+        LatticeVal State = getValueState(*AI);
+        
         if (State.isUndefined())
           return;  // Operands are not resolved yet.
-        else if (State.isOverdefined()) {
-          markOverdefined(I);
-          return;
-        }
+        if (State.isOverdefined())
+          return markOverdefined(I);
         assert(State.isConstant() && "Unknown state!");
         Operands.push_back(State.getConstant());
       }
      
       // If we can constant fold this, mark the result of the call as a
       // constant.
-      if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size())) {
-        markConstant(I, C);
-        return;
-      }
+      if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+        return markConstant(I, C);
     }
 
     // Otherwise, we don't know anything about this call, mark it overdefined.
-    markOverdefined(I);
-    return;
+    return markAnythingOverdefined(I);
   }
 
-  // If this is a single/zero retval case, see if we're tracking the function.
-  DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
-  if (TFRVI != TrackedRetVals.end()) {
-    // If so, propagate the return value of the callee into this call result.
-    mergeInValue(I, TFRVI->second);
-  } else if (isa<StructType>(I->getType())) {
-    // Check to see if we're tracking this callee, if not, handle it in the
-    // common path above.
-    DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
-    TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0));
-    if (TMRVI == TrackedMultipleRetVals.end())
-      goto CallOverdefined;
-
-    // Need to mark as overdefined, otherwise it stays undefined which
-    // creates extractvalue undef, <idx>
-    markOverdefined(I);
-    // If we are tracking this callee, propagate the return values of the call
-    // into this call site.  We do this by walking all the uses. Single-index
-    // ExtractValueInst uses can be tracked; anything more complicated is
-    // currently handled conservatively.
-    for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-         UI != E; ++UI) {
-      if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(*UI)) {
-        if (EVI->getNumIndices() == 1) {
-          mergeInValue(EVI, 
-                  TrackedMultipleRetVals[std::make_pair(F, *EVI->idx_begin())]);
-          continue;
-        }
+  // If this is a local function that doesn't have its address taken, mark its
+  // entry block executable and merge in the actual arguments to the call into
+  // the formal arguments of the function.
+  if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
+    MarkBlockExecutable(F->begin());
+    
+    // Propagate information from this call site into the callee.
+    CallSite::arg_iterator CAI = CS.arg_begin();
+    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+         AI != E; ++AI, ++CAI) {
+      // If this argument is byval, and if the function is not readonly, there
+      // will be an implicit copy formed of the input aggregate.
+      if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+        markOverdefined(AI);
+        continue;
+      }
+      
+      if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          mergeInValue(getStructValueState(AI, i), AI,
+                       getStructValueState(*CAI, i));
+      } else {
+        mergeInValue(AI, getValueState(*CAI));
       }
-      // The aggregate value is used in a way not handled here. Assume nothing.
-      markOverdefined(*UI);
     }
-  } else {
-    // Otherwise we're not tracking this callee, so handle it in the
-    // common path above.
-    goto CallOverdefined;
   }
-   
-  // Finally, if this is the first call to the function hit, mark its entry
-  // block executable.
-  if (!BBExecutable.count(F->begin()))
-    MarkBlockExecutable(F->begin());
   
-  // Propagate information from this call site into the callee.
-  CallSite::arg_iterator CAI = CS.arg_begin();
-  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-       AI != E; ++AI, ++CAI) {
-    LatticeVal &IV = ValueState[AI];
-    if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
-      IV.markOverdefined();
-      continue;
-    }
-    if (!IV.isOverdefined())
-      mergeInValue(IV, AI, getValueState(*CAI));
+  // If this is a single/zero retval case, see if we're tracking the function.
+  if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+    if (!MRVFunctionsTracked.count(F))
+      goto CallOverdefined;  // Not tracking this callee.
+    
+    // If we are tracking this callee, propagate the result of the function
+    // into this call site.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      mergeInValue(getStructValueState(I, i), I, 
+                   TrackedMultipleRetVals[std::make_pair(F, i)]);
+  } else {
+    DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+    if (TFRVI == TrackedRetVals.end())
+      goto CallOverdefined;  // Not tracking this callee.
+      
+    // If so, propagate the return value of the callee into this call result.
+    mergeInValue(I, TFRVI->second);
   }
 }
 
@@ -1278,10 +1313,10 @@ void SCCPSolver::Solve() {
   // Process the work lists until they are empty!
   while (!BBWorkList.empty() || !InstWorkList.empty() ||
          !OverdefinedInstWorkList.empty()) {
-    // Process the instruction work list...
+    // Process the overdefined instruction's work list first, which drives other
+    // things to overdefined more quickly.
     while (!OverdefinedInstWorkList.empty()) {
-      Value *I = OverdefinedInstWorkList.back();
-      OverdefinedInstWorkList.pop_back();
+      Value *I = OverdefinedInstWorkList.pop_back_val();
 
       DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n');
 
@@ -1290,33 +1325,35 @@ void SCCPSolver::Solve() {
       //
       // Anything on this worklist that is overdefined need not be visited
       // since all of its users will have already been marked as overdefined
-      // Update all of the users of this instruction's value...
+      // Update all of the users of this instruction's value.
       //
       for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
            UI != E; ++UI)
-        OperandChangedState(*UI);
+        if (Instruction *I = dyn_cast<Instruction>(*UI))
+          OperandChangedState(I);
     }
-    // Process the instruction work list...
+    
+    // Process the instruction work list.
     while (!InstWorkList.empty()) {
-      Value *I = InstWorkList.back();
-      InstWorkList.pop_back();
+      Value *I = InstWorkList.pop_back_val();
 
       DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n');
 
-      // "I" got into the work list because it either made the transition from
-      // bottom to constant
+      // "I" got into the work list because it made the transition from undef to
+      // constant.
       //
       // Anything on this worklist that is overdefined need not be visited
       // since all of its users will have already been marked as overdefined.
-      // Update all of the users of this instruction's value...
+      // Update all of the users of this instruction's value.
       //
-      if (!getValueState(I).isOverdefined())
+      if (isa<StructType>(I->getType()) || !getValueState(I).isOverdefined())
         for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
              UI != E; ++UI)
-          OperandChangedState(*UI);
+          if (Instruction *I = dyn_cast<Instruction>(*UI))
+            OperandChangedState(I);
     }
 
-    // Process the basic block work list...
+    // Process the basic block work list.
     while (!BBWorkList.empty()) {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
@@ -1357,13 +1394,35 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       // Look for instructions which produce undef values.
       if (I->getType()->isVoidTy()) continue;
       
+      if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
+        // Only a few things that can be structs matter for undef.  Just send
+        // all their results to overdefined.  We could be more precise than this
+        // but it isn't worth bothering.
+        if (isa<CallInst>(I) || isa<SelectInst>(I)) {
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            LatticeVal &LV = getStructValueState(I, i);
+            if (LV.isUndefined())
+              markOverdefined(LV, I);
+          }
+        }
+        continue;
+      }
+      
       LatticeVal &LV = getValueState(I);
       if (!LV.isUndefined()) continue;
 
+      // No instructions using structs need disambiguation.
+      if (isa<StructType>(I->getOperand(0)->getType()))
+        continue;
+
       // Get the lattice values of the first two operands for use below.
-      LatticeVal &Op0LV = getValueState(I->getOperand(0));
+      LatticeVal Op0LV = getValueState(I->getOperand(0));
       LatticeVal Op1LV;
       if (I->getNumOperands() == 2) {
+        // No instructions using structs need disambiguation.
+        if (isa<StructType>(I->getOperand(1)->getType()))
+          continue;
+        
         // If this is a two-operand instruction, and if both operands are
         // undefs, the result stays undef.
         Op1LV = getValueState(I->getOperand(1));
@@ -1380,23 +1439,18 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // After a zero extend, we know the top part is zero.  SExt doesn't have
         // to be handled here, because we don't know whether the top part is 1's
         // or 0's.
-        assert(Op0LV.isUndefined());
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Mul:
       case Instruction::And:
         // undef * X -> 0.   X could be zero.
         // undef & X -> 0.   X could be zero.
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
 
       case Instruction::Or:
         // undef | X -> -1.   X could be -1.
-        if (const VectorType *PTy = dyn_cast<VectorType>(ITy))
-          markForcedConstant(LV, I,
-                             Constant::getAllOnesValue(PTy));
-        else          
-          markForcedConstant(LV, I, Constant::getAllOnesValue(ITy));
+        markForcedConstant(I, Constant::getAllOnesValue(ITy));
         return true;
 
       case Instruction::SDiv:
@@ -1409,7 +1463,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
         
       case Instruction::AShr:
@@ -1418,9 +1472,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // X >>s undef -> X.  X could be 0, X could have the high-bit known set.
         if (Op0LV.isConstant())
-          markForcedConstant(LV, I, Op0LV.getConstant());
+          markForcedConstant(I, Op0LV.getConstant());
         else
-          markOverdefined(LV, I);
+          markOverdefined(I);
         return true;
       case Instruction::LShr:
       case Instruction::Shl:
@@ -1430,7 +1484,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // X >> undef -> 0.  X could be 0.
         // X << undef -> 0.  X could be 0.
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Select:
         // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
@@ -1448,15 +1502,15 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         }
         
         if (Op1LV.isConstant())
-          markForcedConstant(LV, I, Op1LV.getConstant());
+          markForcedConstant(I, Op1LV.getConstant());
         else
-          markOverdefined(LV, I);
+          markOverdefined(I);
         return true;
       case Instruction::Call:
         // If a call has an undef result, it is because it is constant foldable
         // but one of the inputs was undef.  Just force the result to
         // overdefined.
-        markOverdefined(LV, I);
+        markOverdefined(I);
         return true;
       }
     }
@@ -1467,7 +1521,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       if (!getValueState(BI->getCondition()).isUndefined())
         continue;
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      if (SI->getNumSuccessors()<2)   // no cases
+      if (SI->getNumSuccessors() < 2)   // no cases
         continue;
       if (!getValueState(SI->getCondition()).isUndefined())
         continue;
@@ -1493,7 +1547,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     // as undef, then further analysis could think the undef went another way
     // leading to an inconsistent set of conclusions.
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      BI->setCondition(ConstantInt::getFalse(*Context));
+      BI->setCondition(ConstantInt::getFalse(BI->getContext()));
     } else {
       SwitchInst *SI = cast<SwitchInst>(TI);
       SI->setCondition(SI->getCaseValue(1));
@@ -1531,26 +1585,40 @@ char SCCP::ID = 0;
 static RegisterPass<SCCP>
 X("sccp", "Sparse Conditional Constant Propagation");
 
-// createSCCPPass - This is the public interface to this file...
+// createSCCPPass - This is the public interface to this file.
 FunctionPass *llvm::createSCCPPass() {
   return new SCCP();
 }
 
+static void DeleteInstructionInBlock(BasicBlock *BB) {
+  DEBUG(errs() << "  BasicBlock Dead:" << *BB);
+  ++NumDeadBlocks;
+  
+  // Delete the instructions backwards, as it has a reduced likelihood of
+  // having to update as many def-use and use-def chains.
+  while (!isa<TerminatorInst>(BB->begin())) {
+    Instruction *I = --BasicBlock::iterator(BB->getTerminator());
+    
+    if (!I->use_empty())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+    BB->getInstList().erase(I);
+    ++NumInstRemoved;
+  }
+}
 
 // runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
 // and return true if the function was modified.
 //
 bool SCCP::runOnFunction(Function &F) {
   DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n");
-  SCCPSolver Solver;
-  Solver.setContext(&F.getContext());
+  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
 
   // Mark the first block of the function as being executable.
   Solver.MarkBlockExecutable(F.begin());
 
   // Mark all arguments to the function as being overdefined.
   for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
-    Solver.markOverdefined(AI);
+    Solver.markAnythingOverdefined(AI);
 
   // Solve for constants.
   bool ResolvedUndefs = true;
@@ -1565,57 +1633,45 @@ bool SCCP::runOnFunction(Function &F) {
   // If we decided that there are basic blocks that are dead in this function,
   // delete their contents now.  Note that we cannot actually delete the blocks,
   // as we cannot modify the CFG of the function.
-  //
-  SmallVector<Instruction*, 512> Insts;
-  std::map<Value*, LatticeVal> &Values = Solver.getValueMapping();
 
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (!Solver.isBlockExecutable(BB)) {
-      DEBUG(errs() << "  BasicBlock Dead:" << *BB);
-      ++NumDeadBlocks;
-
-      // Delete the instructions backwards, as it has a reduced likelihood of
-      // having to update as many def-use and use-def chains.
-      for (BasicBlock::iterator I = BB->begin(), E = BB->getTerminator();
-           I != E; ++I)
-        Insts.push_back(I);
-      while (!Insts.empty()) {
-        Instruction *I = Insts.back();
-        Insts.pop_back();
-        if (!I->use_empty())
-          I->replaceAllUsesWith(UndefValue::get(I->getType()));
-        BB->getInstList().erase(I);
-        MadeChanges = true;
-        ++NumInstRemoved;
-      }
-    } else {
-      // Iterate over all of the instructions in a function, replacing them with
-      // constants if we have found them to be of constant values.
-      //
-      for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
-        Instruction *Inst = BI++;
-        if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
-          continue;
-        
-        LatticeVal &IV = Values[Inst];
-        if (!IV.isConstant() && !IV.isUndefined())
-          continue;
-        
-        Constant *Const = IV.isConstant()
-          ? IV.getConstant() : UndefValue::get(Inst->getType());
-        DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
+      DeleteInstructionInBlock(BB);
+      MadeChanges = true;
+      continue;
+    }
+  
+    // Iterate over all of the instructions in a function, replacing them with
+    // constants if we have found them to be of constant values.
+    //
+    for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+      Instruction *Inst = BI++;
+      if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+        continue;
+      
+      // TODO: Reconstruct structs from their elements.
+      if (isa<StructType>(Inst->getType()))
+        continue;
+      
+      LatticeVal IV = Solver.getLatticeValueFor(Inst);
+      if (IV.isOverdefined())
+        continue;
+      
+      Constant *Const = IV.isConstant()
+        ? IV.getConstant() : UndefValue::get(Inst->getType());
+      DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
 
-        // Replaces all of the uses of a variable with uses of the constant.
-        Inst->replaceAllUsesWith(Const);
-        
-        // Delete the instruction.
-        Inst->eraseFromParent();
-        
-        // Hey, we just changed something!
-        MadeChanges = true;
-        ++NumInstRemoved;
-      }
+      // Replaces all of the uses of a variable with uses of the constant.
+      Inst->replaceAllUsesWith(Const);
+      
+      // Delete the instruction.
+      Inst->eraseFromParent();
+      
+      // Hey, we just changed something!
+      MadeChanges = true;
+      ++NumInstRemoved;
     }
+  }
 
   return MadeChanges;
 }
@@ -1637,7 +1693,7 @@ char IPSCCP::ID = 0;
 static RegisterPass<IPSCCP>
 Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
 
-// createIPSCCPPass - This is the public interface to this file...
+// createIPSCCPPass - This is the public interface to this file.
 ModulePass *llvm::createIPSCCPPass() {
   return new IPSCCP();
 }
@@ -1654,12 +1710,14 @@ static bool AddressIsTaken(GlobalValue *GV) {
         return true;  // Storing addr of GV.
     } else if (isa<InvokeInst>(*UI) || isa<CallInst>(*UI)) {
       // Make sure we are calling the function, not passing the address.
-      CallSite CS = CallSite::get(cast<Instruction>(*UI));
-      if (CS.hasArgument(GV))
+      if (UI.getOperandNo() != 0)
         return true;
     } else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
       if (LI->isVolatile())
         return true;
+    } else if (isa<BlockAddress>(*UI)) {
+      // blockaddress doesn't take the address of the function, it takes addr
+      // of label.
     } else {
       return true;
     }
@@ -1667,25 +1725,37 @@ static bool AddressIsTaken(GlobalValue *GV) {
 }
 
 bool IPSCCP::runOnModule(Module &M) {
-  LLVMContext *Context = &M.getContext();
-  
-  SCCPSolver Solver;
-  Solver.setContext(Context);
+  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
 
   // Loop over all functions, marking arguments to those with their addresses
   // taken or that are external as overdefined.
   //
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
-    if (!F->hasLocalLinkage() || AddressIsTaken(F)) {
-      if (!F->isDeclaration())
-        Solver.MarkBlockExecutable(F->begin());
-      for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-           AI != E; ++AI)
-        Solver.markOverdefined(AI);
-    } else {
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration())
+      continue;
+    
+    // If this is a strong or ODR definition of this function, then we can
+    // propagate information about its result into callsites of it.
+    if (!F->mayBeOverridden())
       Solver.AddTrackedFunction(F);
+    
+    // If this function only has direct calls that we can see, we can track its
+    // arguments and return value aggressively, and can assume it is not called
+    // unless we see evidence to the contrary.
+    if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
+      Solver.AddArgumentTrackedFunction(F);
+      continue;
     }
 
+    // Assume the function is called.
+    Solver.MarkBlockExecutable(F->begin());
+    
+    // Assume nothing about the incoming arguments.
+    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+         AI != E; ++AI)
+      Solver.markAnythingOverdefined(AI);
+  }
+
   // Loop over global variables.  We inform the solver about any internal global
   // variables that do not have their 'addresses taken'.  If they don't have
   // their addresses taken, we can propagate constants through them.
@@ -1710,48 +1780,37 @@ bool IPSCCP::runOnModule(Module &M) {
   // Iterate over all of the instructions in the module, replacing them with
   // constants if we have found them to be of constant values.
   //
-  SmallVector<Instruction*, 512> Insts;
   SmallVector<BasicBlock*, 512> BlocksToErase;
-  std::map<Value*, LatticeVal> &Values = Solver.getValueMapping();
 
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-         AI != E; ++AI)
-      if (!AI->use_empty()) {
-        LatticeVal &IV = Values[AI];
-        if (IV.isConstant() || IV.isUndefined()) {
-          Constant *CST = IV.isConstant() ?
-            IV.getConstant() : UndefValue::get(AI->getType());
-          DEBUG(errs() << "***  Arg " << *AI << " = " << *CST <<"\n");
-
-          // Replaces all of the uses of a variable with uses of the
-          // constant.
-          AI->replaceAllUsesWith(CST);
-          ++IPNumArgsElimed;
-        }
+    if (Solver.isBlockExecutable(F->begin())) {
+      for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+           AI != E; ++AI) {
+        if (AI->use_empty() || isa<StructType>(AI->getType())) continue;
+        
+        // TODO: Could use getStructLatticeValueFor to find out if the entire
+        // result is a constant and replace it entirely if so.
+
+        LatticeVal IV = Solver.getLatticeValueFor(AI);
+        if (IV.isOverdefined()) continue;
+        
+        Constant *CST = IV.isConstant() ?
+        IV.getConstant() : UndefValue::get(AI->getType());
+        DEBUG(errs() << "***  Arg " << *AI << " = " << *CST <<"\n");
+        
+        // Replaces all of the uses of a variable with uses of the
+        // constant.
+        AI->replaceAllUsesWith(CST);
+        ++IPNumArgsElimed;
       }
+    }
 
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
       if (!Solver.isBlockExecutable(BB)) {
-        DEBUG(errs() << "  BasicBlock Dead:" << *BB);
-        ++IPNumDeadBlocks;
+        DeleteInstructionInBlock(BB);
+        MadeChanges = true;
 
-        // Delete the instructions backwards, as it has a reduced likelihood of
-        // having to update as many def-use and use-def chains.
         TerminatorInst *TI = BB->getTerminator();
-        for (BasicBlock::iterator I = BB->begin(), E = TI; I != E; ++I)
-          Insts.push_back(I);
-
-        while (!Insts.empty()) {
-          Instruction *I = Insts.back();
-          Insts.pop_back();
-          if (!I->use_empty())
-            I->replaceAllUsesWith(UndefValue::get(I->getType()));
-          BB->getInstList().erase(I);
-          MadeChanges = true;
-          ++IPNumInstRemoved;
-        }
-
         for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
           BasicBlock *Succ = TI->getSuccessor(i);
           if (!Succ->empty() && isa<PHINode>(Succ->begin()))
@@ -1759,40 +1818,44 @@ bool IPSCCP::runOnModule(Module &M) {
         }
         if (!TI->use_empty())
           TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
-        BB->getInstList().erase(TI);
+        TI->eraseFromParent();
 
         if (&*BB != &F->front())
           BlocksToErase.push_back(BB);
         else
           new UnreachableInst(M.getContext(), BB);
+        continue;
+      }
+      
+      for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+        Instruction *Inst = BI++;
+        if (Inst->getType()->isVoidTy() || isa<StructType>(Inst->getType()))
+          continue;
+        
+        // TODO: Could use getStructLatticeValueFor to find out if the entire
+        // result is a constant and replace it entirely if so.
+        
+        LatticeVal IV = Solver.getLatticeValueFor(Inst);
+        if (IV.isOverdefined())
+          continue;
+        
+        Constant *Const = IV.isConstant()
+          ? IV.getConstant() : UndefValue::get(Inst->getType());
+        DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
 
-      } else {
-        for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
-          Instruction *Inst = BI++;
-          if (Inst->getType()->isVoidTy())
-            continue;
-          
-          LatticeVal &IV = Values[Inst];
-          if (!IV.isConstant() && !IV.isUndefined())
-            continue;
-          
-          Constant *Const = IV.isConstant()
-            ? IV.getConstant() : UndefValue::get(Inst->getType());
-          DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
-
-          // Replaces all of the uses of a variable with uses of the
-          // constant.
-          Inst->replaceAllUsesWith(Const);
-          
-          // Delete the instruction.
-          if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
-            Inst->eraseFromParent();
+        // Replaces all of the uses of a variable with uses of the
+        // constant.
+        Inst->replaceAllUsesWith(Const);
+        
+        // Delete the instruction.
+        if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+          Inst->eraseFromParent();
 
-          // Hey, we just changed something!
-          MadeChanges = true;
-          ++IPNumInstRemoved;
-        }
+        // Hey, we just changed something!
+        MadeChanges = true;
+        ++IPNumInstRemoved;
       }
+    }
 
     // Now that all instructions in the function are constant folded, erase dead
     // blocks, because we can now use ConstantFoldTerminator to get rid of
@@ -1844,16 +1907,21 @@ bool IPSCCP::runOnModule(Module &M) {
   // TODO: Process multiple value ret instructions also.
   const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
   for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
-         E = RV.end(); I != E; ++I)
-    if (!I->second.isOverdefined() &&
-        !I->first->getReturnType()->isVoidTy()) {
-      Function *F = I->first;
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-        if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
-          if (!isa<UndefValue>(RI->getOperand(0)))
-            RI->setOperand(0, UndefValue::get(F->getReturnType()));
-    }
-
+       E = RV.end(); I != E; ++I) {
+    Function *F = I->first;
+    if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+      continue;
+  
+    // We can only do this if we know that nothing else can call the function.
+    if (!F->hasLocalLinkage() || AddressIsTaken(F))
+      continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+        if (!isa<UndefValue>(RI->getOperand(0)))
+          RI->setOperand(0, UndefValue::get(F->getReturnType()));
+  }
+    
   // If we infered constant or undef values for globals variables, we can delete
   // the global and any stores that remain to it.
   const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
new file mode 100644
index 0000000..c047fca
--- /dev/null
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -0,0 +1,721 @@
+//===- SCCVN.cpp - Eliminate redundant values -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions.  This is based on the paper "SCC-based Value Numbering"
+// by Cooper, et al.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <cstdio>
+using namespace llvm;
+
+STATISTIC(NumSCCVNInstr,  "Number of instructions deleted by SCCVN");
+STATISTIC(NumSCCVNPhi,  "Number of phis deleted by SCCVN");
+
+//===----------------------------------------------------------------------===//
+//                         ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers.  It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+  struct Expression {
+    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
+                            UDIV, SDIV, FDIV, UREM, SREM,
+                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
+                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
+                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
+                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
+                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
+                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
+                            INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
+
+    ExpressionOpcode opcode;
+    const Type* type;
+    SmallVector<uint32_t, 4> varargs;
+
+    Expression() { }
+    Expression(ExpressionOpcode o) : opcode(o) { }
+
+    bool operator==(const Expression &other) const {
+      if (opcode != other.opcode)
+        return false;
+      else if (opcode == EMPTY || opcode == TOMBSTONE)
+        return true;
+      else if (type != other.type)
+        return false;
+      else {
+        if (varargs.size() != other.varargs.size())
+          return false;
+
+        for (size_t i = 0; i < varargs.size(); ++i)
+          if (varargs[i] != other.varargs[i])
+            return false;
+
+        return true;
+      }
+    }
+
+    bool operator!=(const Expression &other) const {
+      return !(*this == other);
+    }
+  };
+
+  class ValueTable {
+    private:
+      DenseMap<Value*, uint32_t> valueNumbering;
+      DenseMap<Expression, uint32_t> expressionNumbering;
+      DenseMap<Value*, uint32_t> constantsNumbering;
+
+      uint32_t nextValueNumber;
+
+      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
+      Expression::ExpressionOpcode getOpcode(CmpInst* C);
+      Expression::ExpressionOpcode getOpcode(CastInst* C);
+      Expression create_expression(BinaryOperator* BO);
+      Expression create_expression(CmpInst* C);
+      Expression create_expression(ShuffleVectorInst* V);
+      Expression create_expression(ExtractElementInst* C);
+      Expression create_expression(InsertElementInst* V);
+      Expression create_expression(SelectInst* V);
+      Expression create_expression(CastInst* C);
+      Expression create_expression(GetElementPtrInst* G);
+      Expression create_expression(CallInst* C);
+      Expression create_expression(Constant* C);
+      Expression create_expression(ExtractValueInst* C);
+      Expression create_expression(InsertValueInst* C);
+    public:
+      ValueTable() : nextValueNumber(1) { }
+      uint32_t computeNumber(Value *V);
+      uint32_t lookup(Value *V);
+      void add(Value *V, uint32_t num);
+      void clear();
+      void clearExpressions();
+      void erase(Value *v);
+      unsigned size();
+      void verifyRemoved(const Value *) const;
+  };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+  static inline Expression getEmptyKey() {
+    return Expression(Expression::EMPTY);
+  }
+
+  static inline Expression getTombstoneKey() {
+    return Expression(Expression::TOMBSTONE);
+  }
+
+  static unsigned getHashValue(const Expression e) {
+    unsigned hash = e.opcode;
+
+    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+            (unsigned)((uintptr_t)e.type >> 9));
+
+    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+         E = e.varargs.end(); I != E; ++I)
+      hash = *I + hash * 37;
+
+    return hash;
+  }
+  static bool isEqual(const Expression &LHS, const Expression &RHS) {
+    return LHS == RHS;
+  }
+  static bool isPod() { return true; }
+};
+}
+
+//===----------------------------------------------------------------------===//
+//                     ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
+  switch(BO->getOpcode()) {
+  default: // THIS SHOULD NEVER HAPPEN
+    llvm_unreachable("Binary operator with unknown opcode?");
+  case Instruction::Add:  return Expression::ADD;
+  case Instruction::FAdd: return Expression::FADD;
+  case Instruction::Sub:  return Expression::SUB;
+  case Instruction::FSub: return Expression::FSUB;
+  case Instruction::Mul:  return Expression::MUL;
+  case Instruction::FMul: return Expression::FMUL;
+  case Instruction::UDiv: return Expression::UDIV;
+  case Instruction::SDiv: return Expression::SDIV;
+  case Instruction::FDiv: return Expression::FDIV;
+  case Instruction::URem: return Expression::UREM;
+  case Instruction::SRem: return Expression::SREM;
+  case Instruction::FRem: return Expression::FREM;
+  case Instruction::Shl:  return Expression::SHL;
+  case Instruction::LShr: return Expression::LSHR;
+  case Instruction::AShr: return Expression::ASHR;
+  case Instruction::And:  return Expression::AND;
+  case Instruction::Or:   return Expression::OR;
+  case Instruction::Xor:  return Expression::XOR;
+  }
+}
+
+Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
+  if (isa<ICmpInst>(C)) {
+    switch (C->getPredicate()) {
+    default:  // THIS SHOULD NEVER HAPPEN
+      llvm_unreachable("Comparison with unknown predicate?");
+    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
+    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
+    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
+    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
+    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
+    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
+    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
+    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
+    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
+    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
+    }
+  } else {
+    switch (C->getPredicate()) {
+    default: // THIS SHOULD NEVER HAPPEN
+      llvm_unreachable("Comparison with unknown predicate?");
+    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
+    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
+    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
+    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
+    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
+    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
+    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
+    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
+    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
+    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
+    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
+    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
+    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
+    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+    }
+  }
+}
+
+Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
+  switch(C->getOpcode()) {
+  default: // THIS SHOULD NEVER HAPPEN
+    llvm_unreachable("Cast operator with unknown opcode?");
+  case Instruction::Trunc:    return Expression::TRUNC;
+  case Instruction::ZExt:     return Expression::ZEXT;
+  case Instruction::SExt:     return Expression::SEXT;
+  case Instruction::FPToUI:   return Expression::FPTOUI;
+  case Instruction::FPToSI:   return Expression::FPTOSI;
+  case Instruction::UIToFP:   return Expression::UITOFP;
+  case Instruction::SIToFP:   return Expression::SITOFP;
+  case Instruction::FPTrunc:  return Expression::FPTRUNC;
+  case Instruction::FPExt:    return Expression::FPEXT;
+  case Instruction::PtrToInt: return Expression::PTRTOINT;
+  case Instruction::IntToPtr: return Expression::INTTOPTR;
+  case Instruction::BitCast:  return Expression::BITCAST;
+  }
+}
+
+Expression ValueTable::create_expression(CallInst* C) {
+  Expression e;
+
+  e.type = C->getType();
+  e.opcode = Expression::CALL;
+
+  e.varargs.push_back(lookup(C->getCalledFunction()));
+  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+       I != E; ++I)
+    e.varargs.push_back(lookup(*I));
+
+  return e;
+}
+
+Expression ValueTable::create_expression(BinaryOperator* BO) {
+  Expression e;
+  e.varargs.push_back(lookup(BO->getOperand(0)));
+  e.varargs.push_back(lookup(BO->getOperand(1)));
+  e.type = BO->getType();
+  e.opcode = getOpcode(BO);
+
+  return e;
+}
+
+Expression ValueTable::create_expression(CmpInst* C) {
+  Expression e;
+
+  e.varargs.push_back(lookup(C->getOperand(0)));
+  e.varargs.push_back(lookup(C->getOperand(1)));
+  e.type = C->getType();
+  e.opcode = getOpcode(C);
+
+  return e;
+}
+
+Expression ValueTable::create_expression(CastInst* C) {
+  Expression e;
+
+  e.varargs.push_back(lookup(C->getOperand(0)));
+  e.type = C->getType();
+  e.opcode = getOpcode(C);
+
+  return e;
+}
+
+Expression ValueTable::create_expression(ShuffleVectorInst* S) {
+  Expression e;
+
+  e.varargs.push_back(lookup(S->getOperand(0)));
+  e.varargs.push_back(lookup(S->getOperand(1)));
+  e.varargs.push_back(lookup(S->getOperand(2)));
+  e.type = S->getType();
+  e.opcode = Expression::SHUFFLE;
+
+  return e;
+}
+
+Expression ValueTable::create_expression(ExtractElementInst* E) {
+  Expression e;
+
+  e.varargs.push_back(lookup(E->getOperand(0)));
+  e.varargs.push_back(lookup(E->getOperand(1)));
+  e.type = E->getType();
+  e.opcode = Expression::EXTRACT;
+
+  return e;
+}
+
+Expression ValueTable::create_expression(InsertElementInst* I) {
+  Expression e;
+
+  e.varargs.push_back(lookup(I->getOperand(0)));
+  e.varargs.push_back(lookup(I->getOperand(1)));
+  e.varargs.push_back(lookup(I->getOperand(2)));
+  e.type = I->getType();
+  e.opcode = Expression::INSERT;
+
+  return e;
+}
+
+Expression ValueTable::create_expression(SelectInst* I) {
+  Expression e;
+
+  e.varargs.push_back(lookup(I->getCondition()));
+  e.varargs.push_back(lookup(I->getTrueValue()));
+  e.varargs.push_back(lookup(I->getFalseValue()));
+  e.type = I->getType();
+  e.opcode = Expression::SELECT;
+
+  return e;
+}
+
+Expression ValueTable::create_expression(GetElementPtrInst* G) {
+  Expression e;
+
+  e.varargs.push_back(lookup(G->getPointerOperand()));
+  e.type = G->getType();
+  e.opcode = Expression::GEP;
+
+  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
+       I != E; ++I)
+    e.varargs.push_back(lookup(*I));
+
+  return e;
+}
+
+Expression ValueTable::create_expression(ExtractValueInst* E) {
+  Expression e;
+
+  e.varargs.push_back(lookup(E->getAggregateOperand()));
+  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+       II != IE; ++II)
+    e.varargs.push_back(*II);
+  e.type = E->getType();
+  e.opcode = Expression::EXTRACTVALUE;
+
+  return e;
+}
+
+Expression ValueTable::create_expression(InsertValueInst* E) {
+  Expression e;
+
+  e.varargs.push_back(lookup(E->getAggregateOperand()));
+  e.varargs.push_back(lookup(E->getInsertedValueOperand()));
+  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+       II != IE; ++II)
+    e.varargs.push_back(*II);
+  e.type = E->getType();
+  e.opcode = Expression::INSERTVALUE;
+
+  return e;
+}
+
+//===----------------------------------------------------------------------===//
+//                     ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value *V, uint32_t num) {
+  valueNumbering[V] = num;
+}
+
+/// computeNumber - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::computeNumber(Value *V) {
+  if (uint32_t v = valueNumbering[V])
+    return v;
+  else if (uint32_t v= constantsNumbering[V])
+    return v;
+
+  if (!isa<Instruction>(V)) {
+    constantsNumbering[V] = nextValueNumber;
+    return nextValueNumber++;
+  }
+  
+  Instruction* I = cast<Instruction>(V);
+  Expression exp;
+  switch (I->getOpcode()) {
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or :
+    case Instruction::Xor:
+      exp = create_expression(cast<BinaryOperator>(I));
+      break;
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      exp = create_expression(cast<CmpInst>(I));
+      break;
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+      exp = create_expression(cast<CastInst>(I));
+      break;
+    case Instruction::Select:
+      exp = create_expression(cast<SelectInst>(I));
+      break;
+    case Instruction::ExtractElement:
+      exp = create_expression(cast<ExtractElementInst>(I));
+      break;
+    case Instruction::InsertElement:
+      exp = create_expression(cast<InsertElementInst>(I));
+      break;
+    case Instruction::ShuffleVector:
+      exp = create_expression(cast<ShuffleVectorInst>(I));
+      break;
+    case Instruction::ExtractValue:
+      exp = create_expression(cast<ExtractValueInst>(I));
+      break;
+    case Instruction::InsertValue:
+      exp = create_expression(cast<InsertValueInst>(I));
+      break;      
+    case Instruction::GetElementPtr:
+      exp = create_expression(cast<GetElementPtrInst>(I));
+      break;
+    default:
+      valueNumbering[V] = nextValueNumber;
+      return nextValueNumber++;
+  }
+
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  valueNumbering[V] = e;
+  
+  return e;
+}
+
+/// lookup - Returns the value number of the specified value. Returns 0 if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value *V) {
+  if (!isa<Instruction>(V)) {
+    if (!constantsNumbering.count(V))
+      constantsNumbering[V] = nextValueNumber++;
+    return constantsNumbering[V];
+  }
+  
+  return valueNumbering[V];
+}
+
+/// clear - Remove all entries from the ValueTable
+void ValueTable::clear() {
+  valueNumbering.clear();
+  expressionNumbering.clear();
+  constantsNumbering.clear();
+  nextValueNumber = 1;
+}
+
+void ValueTable::clearExpressions() {
+  expressionNumbering.clear();
+  constantsNumbering.clear();
+  nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering
+void ValueTable::erase(Value *V) {
+  valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+  for (DenseMap<Value*, uint32_t>::iterator
+         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+    assert(I->first != V && "Inst still occurs in value numbering map!");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                              SCCVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+  struct ValueNumberScope {
+    ValueNumberScope* parent;
+    DenseMap<uint32_t, Value*> table;
+    SparseBitVector<128> availIn;
+    SparseBitVector<128> availOut;
+    
+    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
+  };
+
+  class SCCVN : public FunctionPass {
+    bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    SCCVN() : FunctionPass(&ID) { }
+
+  private:
+    ValueTable VT;
+    DenseMap<BasicBlock*, ValueNumberScope*> BBMap;
+    
+    // This transformation requires dominator postdominator info
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+
+      AU.addPreserved<DominatorTree>();
+      AU.setPreservesCFG();
+    }
+  };
+
+  char SCCVN::ID = 0;
+}
+
+// createSCCVNPass - The public interface to this file...
+FunctionPass *llvm::createSCCVNPass() { return new SCCVN(); }
+
+static RegisterPass<SCCVN> X("sccvn",
+                              "SCC Value Numbering");
+
+static Value *lookupNumber(ValueNumberScope *Locals, uint32_t num) {
+  while (Locals) {
+    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
+    if (I != Locals->table.end())
+      return I->second;
+    Locals = Locals->parent;
+  }
+
+  return 0;
+}
+
+bool SCCVN::runOnFunction(Function& F) {
+  // Implement the RPO version of the SCCVN algorithm.  Conceptually, 
+  // we optimisitically assume that all instructions with the same opcode have
+  // the same VN.  Then we deepen our comparison by one level, to all 
+  // instructions whose operands have the same opcodes get the same VN.  We
+  // iterate this process until the partitioning stops changing, at which
+  // point we have computed a full numbering.
+  ReversePostOrderTraversal<Function*> RPOT(&F);
+  bool done = false;
+  while (!done) {
+    done = true;
+    VT.clearExpressions();
+    for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+         E = RPOT.end(); I != E; ++I) {
+      BasicBlock* BB = *I;
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+           BI != BE; ++BI) {
+         uint32_t origVN = VT.lookup(BI);
+         uint32_t newVN = VT.computeNumber(BI);
+         if (origVN != newVN)
+           done = false;
+      }
+    }
+  }
+  
+  // Now, do a dominator walk, eliminating simple, dominated redundancies as we
+  // go.  Also, build the ValueNumberScope structure that will be used for
+  // computing full availability.
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  bool changed = false;
+  for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
+       DE = df_end(DT.getRootNode()); DI != DE; ++DI) {
+    BasicBlock* BB = DI->getBlock();
+    if (DI->getIDom())
+      BBMap[BB] = new ValueNumberScope(BBMap[DI->getIDom()->getBlock()]);
+    else
+      BBMap[BB] = new ValueNumberScope(0);
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+      uint32_t num = VT.lookup(I);
+      Value* repl = lookupNumber(BBMap[BB], num);
+      
+      if (repl) {
+        if (isa<PHINode>(I))
+          ++NumSCCVNPhi;
+        else
+          ++NumSCCVNInstr;
+        I->replaceAllUsesWith(repl);
+        Instruction* OldInst = I;
+        ++I;
+        BBMap[BB]->table[num] = repl;
+        OldInst->eraseFromParent();
+        changed = true;
+      } else {
+        BBMap[BB]->table[num] = I;
+        BBMap[BB]->availOut.set(num);
+  
+        ++I;
+      }
+    }
+  }
+
+  // FIXME: This code is commented out for now, because it can lead to the
+  // insertion of a lot of redundant PHIs being inserted by SSAUpdater.
+#if 0
+  // Perform a forward data-flow to compute availability at all points on
+  // the CFG.
+  do {
+    changed = false;
+    for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+         E = RPOT.end(); I != E; ++I) {
+      BasicBlock* BB = *I;
+      ValueNumberScope *VNS = BBMap[BB];
+      
+      SparseBitVector<128> preds;
+      bool first = true;
+      for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+           PI != PE; ++PI) {
+        if (first) {
+          preds = BBMap[*PI]->availOut;
+          first = false;
+        } else {
+          preds &= BBMap[*PI]->availOut;
+        }
+      }
+      
+      changed |= (VNS->availIn |= preds);
+      changed |= (VNS->availOut |= preds);
+    }
+  } while (changed);
+  
+  // Use full availability information to perform non-dominated replacements.
+  SSAUpdater SSU; 
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    if (!BBMap.count(FI)) continue;
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ) {
+      uint32_t num = VT.lookup(BI);
+      if (!BBMap[FI]->availIn.test(num)) {
+        ++BI;
+        continue;
+      }
+      
+      SSU.Initialize(BI);
+      
+      SmallPtrSet<BasicBlock*, 8> visited;
+      SmallVector<BasicBlock*, 8> stack;
+      visited.insert(FI);
+      for (pred_iterator PI = pred_begin(FI), PE = pred_end(FI);
+           PI != PE; ++PI)
+        if (!visited.count(*PI))
+          stack.push_back(*PI);
+      
+      while (!stack.empty()) {
+        BasicBlock* CurrBB = stack.back();
+        stack.pop_back();
+        visited.insert(CurrBB);
+        
+        ValueNumberScope* S = BBMap[CurrBB];
+        if (S->table.count(num)) {
+          SSU.AddAvailableValue(CurrBB, S->table[num]);
+        } else {
+          for (pred_iterator PI = pred_begin(CurrBB), PE = pred_end(CurrBB);
+               PI != PE; ++PI)
+            if (!visited.count(*PI))
+              stack.push_back(*PI);
+        }
+      }
+      
+      Value* repl = SSU.GetValueInMiddleOfBlock(FI);
+      BI->replaceAllUsesWith(repl);
+      Instruction* CurInst = BI;
+      ++BI;
+      BBMap[FI]->table[num] = repl;
+      if (isa<PHINode>(CurInst))
+        ++NumSCCVNPhi;
+      else
+        ++NumSCCVNInstr;
+        
+      CurInst->eraseFromParent();
+    }
+  }
+#endif
+
+  VT.clear();
+  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+       I = BBMap.begin(), E = BBMap.end(); I != E; ++I)
+    delete I->second;
+  BBMap.clear();
+  
+  return changed;
+}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 610d874..2e3b694 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -100,32 +100,32 @@ namespace {
 
     void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
 
-    int isSafeAllocaToScalarRepl(AllocationInst *AI);
+    int isSafeAllocaToScalarRepl(AllocaInst *AI);
 
-    void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
+    void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
                                AllocaInfo &Info);
-    void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
+    void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
                          AllocaInfo &Info);
-    void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
+    void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
                                         unsigned OpNo, AllocaInfo &Info);
-    void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI,
+    void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
                                         AllocaInfo &Info);
     
-    void DoScalarReplacement(AllocationInst *AI, 
-                             std::vector<AllocationInst*> &WorkList);
+    void DoScalarReplacement(AllocaInst *AI, 
+                             std::vector<AllocaInst*> &WorkList);
     void CleanupGEP(GetElementPtrInst *GEP);
-    void CleanupAllocaUsers(AllocationInst *AI);
-    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base);
+    void CleanupAllocaUsers(AllocaInst *AI);
+    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
     
-    void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
+    void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
                                     SmallVector<AllocaInst*, 32> &NewElts);
     
     void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
-                                      AllocationInst *AI,
+                                      AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
-    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
+    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
                                        SmallVector<AllocaInst*, 32> &NewElts);
-    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
+    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
     
     bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
@@ -135,7 +135,7 @@ namespace {
                                      uint64_t Offset, IRBuilder<> &Builder);
     Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
                                      uint64_t Offset, IRBuilder<> &Builder);
-    static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
+    static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
   };
 }
 
@@ -213,18 +213,18 @@ static uint64_t getNumSAElements(const Type *T) {
 // them if they are only used by getelementptr instructions.
 //
 bool SROA::performScalarRepl(Function &F) {
-  std::vector<AllocationInst*> WorkList;
+  std::vector<AllocaInst*> WorkList;
 
   // Scan the entry basic block, adding any alloca's and mallocs to the worklist
   BasicBlock &BB = F.getEntryBlock();
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
-    if (AllocationInst *A = dyn_cast<AllocationInst>(I))
+    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
       WorkList.push_back(A);
 
   // Process the worklist
   bool Changed = false;
   while (!WorkList.empty()) {
-    AllocationInst *AI = WorkList.back();
+    AllocaInst *AI = WorkList.back();
     WorkList.pop_back();
     
     // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
@@ -335,8 +335,8 @@ bool SROA::performScalarRepl(Function &F) {
 
 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
 /// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocationInst *AI, 
-                               std::vector<AllocationInst*> &WorkList) {
+void SROA::DoScalarReplacement(AllocaInst *AI, 
+                               std::vector<AllocaInst*> &WorkList) {
   DEBUG(errs() << "Found inst to SROA: " << *AI << '\n');
   SmallVector<AllocaInst*, 32> ElementAllocas;
   if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
@@ -455,7 +455,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
 /// getelementptr instruction of an array aggregate allocation.  isFirstElt
 /// indicates whether Ptr is known to the start of the aggregate.
 ///
-void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
+void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
                             AllocaInfo &Info) {
   for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
        I != E; ++I) {
@@ -520,7 +520,7 @@ static bool AllUsersAreLoads(Value *Ptr) {
 /// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
 /// aggregate allocation.
 ///
-void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
+void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
                                  AllocaInfo &Info) {
   if (BitCastInst *C = dyn_cast<BitCastInst>(User))
     return isSafeUseOfBitCastedAllocation(C, AI, Info);
@@ -605,7 +605,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
 /// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
 /// intrinsic can be promoted by SROA.  At this point, we know that the operand
 /// of the memintrinsic is a pointer to the beginning of the allocation.
-void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
+void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
                                           unsigned OpNo, AllocaInfo &Info) {
   // If not constant length, give up.
   ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
@@ -632,7 +632,7 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
 
 /// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
 /// are 
-void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
+void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
                                           AllocaInfo &Info) {
   for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
        UI != E; ++UI) {
@@ -690,7 +690,7 @@ void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
 /// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
 /// to its first element.  Transform users of the cast to use the new values
 /// instead.
-void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
+void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts) {
   Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
   while (UI != UE) {
@@ -729,7 +729,7 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
 /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
 /// Rewrite it to copy or set the elements of the scalarized memory.
 void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
-                                        AllocationInst *AI,
+                                        AllocaInst *AI,
                                         SmallVector<AllocaInst*, 32> &NewElts) {
   
   // If this is a memcpy/memmove, construct the other pointer as the
@@ -905,8 +905,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
 /// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
 /// overwrites the entire allocation.  Extract out the pieces of the stored
 /// integer and store them individually.
-void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
-                                         AllocationInst *AI,
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
                                          SmallVector<AllocaInst*, 32> &NewElts){
   // Extract each element out of the integer according to its structure offset
   // and store the element value to the individual alloca.
@@ -1029,7 +1028,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
 
 /// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
 /// an integer.  Load the individual pieces to form the aggregate value.
-void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                         SmallVector<AllocaInst*, 32> &NewElts) {
   // Extract each element out of the NewElts according to its structure offset
   // and form the result value.
@@ -1162,7 +1161,7 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
 /// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
 /// or 1 if safe after canonicalization has been performed.
 ///
-int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
+int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   // Loop over the use list of the alloca.  We can only transform it if all of
   // the users are safe to transform.
   AllocaInfo Info;
@@ -1245,7 +1244,7 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
 
 /// CleanupAllocaUsers - If SROA reported that it can promote the specified
 /// allocation, but only if cleaned up, perform the cleanups required.
-void SROA::CleanupAllocaUsers(AllocationInst *AI) {
+void SROA::CleanupAllocaUsers(AllocaInst *AI) {
   // At this point, we know that the end result will be SROA'd and promoted, so
   // we can insert ugly code if required so long as sroa+mem2reg will clean it
   // up.
@@ -1853,7 +1852,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
 /// modified by a copy from a constant global.  If we can prove this, we can
 /// replace any uses of the alloca with uses of the global directly.
-Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) {
+Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
   Instruction *TheCopy = 0;
   if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
     return TheCopy;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 29712b3..6a81480 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -126,6 +126,9 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         }
       }
       
+      // Store to undef and store to null are undefined and used to signal that
+      // they should be changed to unreachable by passes that can't modify the
+      // CFG.
       if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
         Value *Ptr = SI->getOperand(1);
         
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index e1866015..575c93b 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -509,6 +509,27 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
 }
 
 //===----------------------------------------------------------------------===//
+// Miscellaneous LibCall/Intrinsic Optimizations
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct SizeOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // TODO: We can do more with this, but delaying to here should be no change
+    // in behavior.
+    ConstantInt *Const = dyn_cast<ConstantInt>(CI->getOperand(2));
+    
+    if (!Const) return 0;
+    
+    if (Const->getZExtValue() < 2)
+      return Constant::getAllOnesValue(Const->getType());
+    else
+      return ConstantInt::get(Const->getType(), 0);
+  }
+};
+}
+
+//===----------------------------------------------------------------------===//
 // String and Memory LibCall Optimizations
 //===----------------------------------------------------------------------===//
 
@@ -1548,6 +1569,7 @@ namespace {
     // Formatting and IO Optimizations
     SPrintFOpt SPrintF; PrintFOpt PrintF;
     FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+    SizeOpt ObjectSize;
 
     bool Modified;  // This is only used by doInitialization.
   public:
@@ -1653,6 +1675,9 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["fwrite"] = &FWrite;
   Optimizations["fputs"] = &FPuts;
   Optimizations["fprintf"] = &FPrintF;
+  
+  // Miscellaneous
+  Optimizations["llvm.objectsize"] = &ObjectSize;
 }
 
 
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 68689d6..4864e23 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -129,7 +129,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
     if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
 
     // Also alloca and malloc.
-    if (isa<AllocationInst>(I)) return false;
+    if (isa<AllocaInst>(I)) return false;
 
     // Some vector instructions can expand into a number of instructions.
     if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 35907fd..c728c0b 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -65,9 +65,6 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
 void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
-  if (!isa<PHINode>(BB->begin()))
-    return;
-  
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     if (PN->getIncomingValue(0) != PN)
       PN->replaceAllUsesWith(PN->getIncomingValue(0));
@@ -97,10 +94,14 @@ void llvm::DeleteDeadPHIs(BasicBlock *BB) {
 
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
-bool llvm::MergeBlockIntoPredecessor(BasicBlock* BB, Pass* P) {
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
-  // Can't merge the entry block.
-  if (pred_begin(BB) == pred_end(BB)) return false;
+  // Can't merge the entry block.  Don't merge away blocks who have their
+  // address taken: this is a bug if the predecessor block is the entry node
+  // (because we'd end up taking the address of the entry) and undesirable in
+  // any case.
+  if (pred_begin(BB) == pred_end(BB) ||
+      BB->hasAddressTaken()) return false;
   
   BasicBlock *PredBB = *PI++;
   for (; PI != PE; ++PI)  // Search all predecessors, see if they are all same
@@ -274,6 +275,8 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
 /// SplitEdge -  Split the edge connecting specified block. Pass P must 
 /// not be NULL. 
 BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+  assert(!isa<IndirectBrInst>(BB->getTerminator()) &&
+         "Cannot split an edge from an IndirectBrInst");
   TerminatorInst *LatchTerm = BB->getTerminator();
   unsigned SuccNum = 0;
 #ifndef NDEBUG
@@ -675,7 +678,7 @@ void llvm::CopyPrecedingStopPoint(Instruction *I,
   if (I != I->getParent()->begin()) {
     BasicBlock::iterator BBI = I;  --BBI;
     if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BBI)) {
-      CallInst *newDSPI = DSPI->clone();
+      CallInst *newDSPI = cast<CallInst>(DSPI->clone());
       newDSPI->insertBefore(InsertPos);
     }
   }
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index 4b720b1..b5ffe06 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -34,7 +34,7 @@ namespace llvm {
 
   /// BasicInlinerImpl - BasicInliner implemantation class. This hides
   /// container info, used by basic inliner, from public interface.
-  struct VISIBILITY_HIDDEN BasicInlinerImpl {
+  struct BasicInlinerImpl {
     
     BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
     void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 849b2b5..ccd97c8 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/Type.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -35,7 +34,7 @@ using namespace llvm;
 STATISTIC(NumBroken, "Number of blocks inserted");
 
 namespace {
-  struct VISIBILITY_HIDDEN BreakCriticalEdges : public FunctionPass {
+  struct BreakCriticalEdges : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     BreakCriticalEdges() : FunctionPass(&ID) {}
 
@@ -70,7 +69,7 @@ bool BreakCriticalEdges::runOnFunction(Function &F) {
   bool Changed = false;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     TerminatorInst *TI = I->getTerminator();
-    if (TI->getNumSuccessors() > 1)
+    if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
       for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
         if (SplitCriticalEdge(TI, i, this)) {
           ++NumBroken;
@@ -151,14 +150,29 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
 
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
 /// split the critical edge.  This will update DominatorTree and
-/// DominatorFrontier  information if it is available, thus calling this pass
-/// will not invalidate  any of them.  This returns true if the edge was split,
-/// false otherwise.  This ensures that all edges to that dest go to one block
-/// instead of each going to a different block.
-//
+/// DominatorFrontier information if it is available, thus calling this pass
+/// will not invalidate either of them. This returns the new block if the edge
+/// was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.  
+/// This is most commonly interesting with switch instructions, which may 
+/// have many edges to any one destination.  This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't 
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst.  Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
 BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
                                     Pass *P, bool MergeIdenticalEdges) {
   if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+  
+  assert(!isa<IndirectBrInst>(TI) &&
+         "Cannot split critical edge from IndirectBrInst");
+  
   BasicBlock *TIBB = TI->getParent();
   BasicBlock *DestBB = TI->getSuccessor(SuccNum);
 
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index f4394ea..93577b4 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -13,7 +13,7 @@ add_llvm_library(LLVMTransformUtils
   LCSSA.cpp
   Local.cpp
   LoopSimplify.cpp
-  LowerAllocations.cpp
+  LoopUnroll.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
   Mem2Reg.cpp
@@ -22,7 +22,6 @@ add_llvm_library(LLVMTransformUtils
   SSI.cpp
   SimplifyCFG.cpp
   UnifyFunctionExitNodes.cpp
-  UnrollLoop.cpp
   ValueMapper.cpp
   )
 
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 30130fa..fd8862c 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,9 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
@@ -176,7 +174,7 @@ Function *llvm::CloneFunction(const Function *F,
 namespace {
   /// PruningFunctionCloner - This class is a private class used to implement
   /// the CloneAndPruneFunctionInto method.
-  struct VISIBILITY_HIDDEN PruningFunctionCloner {
+  struct PruningFunctionCloner {
     Function *NewFunc;
     const Function *OldFunc;
     DenseMap<const Value*, Value*> &ValueMap;
@@ -329,8 +327,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           ValueMap,
-                                                           Context)))
+                                                           ValueMap)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
@@ -366,7 +363,6 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      ClonedCodeInfo *CodeInfo,
                                      const TargetData *TD) {
   assert(NameSuffix && "NameSuffix cannot be null!");
-  LLVMContext &Context = OldFunc->getContext();
   
 #ifndef NDEBUG
   for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
@@ -437,7 +433,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
         if (BasicBlock *MappedBlock = 
             cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
           Value *InVal = MapValue(PN->getIncomingValue(pred),
-                                  ValueMap, Context);
+                                  ValueMap);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 0285f8c..a163f89 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -89,8 +89,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 ValueMap,
-                                                 M->getContext())));
+                                                 ValueMap)));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -121,7 +120,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap, M->getContext())));
+      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
   }
   
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index c39ccf7..f966681 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -44,7 +43,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
                  cl::desc("Aggregate arguments to code-extracted functions"));
 
 namespace {
-  class VISIBILITY_HIDDEN CodeExtractor {
+  class CodeExtractor {
     typedef std::vector<Value*> Values;
     std::set<BasicBlock*> BlocksToExtract;
     DominatorTree* DT;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 619c939..20f5a4a 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -619,8 +619,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
                "Ret value not consistent in function!");
         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
       }
+    
+      // Now that we inserted the PHI, check to see if it has a single value
+      // (e.g. all the entries are the same or undef).  If so, remove the PHI so
+      // it doesn't block other optimizations.
+      if (Value *V = PHI->hasConstantValue()) {
+        PHI->replaceAllUsesWith(V);
+        PHI->eraseFromParent();
+      }
     }
 
+
     // Add a branch to the merge points and remove return instructions.
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       ReturnInst *RI = Returns[i];
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index b622611..543ddf1 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -59,9 +59,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
 
     // If we see a free or a call which may write to memory (i.e. which might do
     // a free) the pointer could be marked invalid.
-    if (isa<FreeInst>(BBI) || 
-        (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
-         !isa<DbgInfoIntrinsic>(BBI)))
+    if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+        !isa<DbgInfoIntrinsic>(BBI))
       return false;
 
     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
@@ -110,7 +109,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
       // unconditional branch.
       BI->setUnconditionalDest(Destination);
       return true;
-    } else if (Dest2 == Dest1) {       // Conditional branch to same location?
+    }
+    
+    if (Dest2 == Dest1) {       // Conditional branch to same location?
       // This branch matches something like this:
       //     br bool %cond, label %Dest, label %Dest
       // and changes it into:  br label %Dest
@@ -123,7 +124,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
       BI->setUnconditionalDest(Dest1);
       return true;
     }
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+    return false;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
     // If we are switching on a constant, we can convert the switch into a
     // single branch instruction!
     ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
@@ -132,7 +136,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
     assert(TheOnlyDest == SI->getDefaultDest() &&
            "Default destination is not successor #0?");
 
-    // Figure out which case it goes to...
+    // Figure out which case it goes to.
     for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
       // Found case matching a constant operand?
       if (SI->getSuccessorValue(i) == CI) {
@@ -143,7 +147,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
       // Check to see if this branch is going to the same place as the default
       // dest.  If so, eliminate it as an explicit compare.
       if (SI->getSuccessor(i) == DefaultDest) {
-        // Remove this entry...
+        // Remove this entry.
         DefaultDest->removePredecessor(SI->getParent());
         SI->removeCase(i);
         --i; --e;  // Don't skip an entry...
@@ -165,7 +169,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
     // If we found a single destination that we can fold the switch into, do so
     // now.
     if (TheOnlyDest) {
-      // Insert the new branch..
+      // Insert the new branch.
       BranchInst::Create(TheOnlyDest, SI);
       BasicBlock *BB = SI->getParent();
 
@@ -179,22 +183,54 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
           Succ->removePredecessor(BB);
       }
 
-      // Delete the old switch...
+      // Delete the old switch.
       BB->getInstList().erase(SI);
       return true;
-    } else if (SI->getNumSuccessors() == 2) {
+    }
+    
+    if (SI->getNumSuccessors() == 2) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
       Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(),
                                  SI->getSuccessorValue(1), "cond");
-      // Insert the new branch...
+      // Insert the new branch.
       BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
 
-      // Delete the old switch...
+      // Delete the old switch.
       SI->eraseFromParent();
       return true;
     }
+    return false;
   }
+
+  if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+    // indirectbr blockaddress(@F, @BB) -> br label @BB
+    if (BlockAddress *BA =
+          dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+      BasicBlock *TheOnlyDest = BA->getBasicBlock();
+      // Insert the new branch.
+      BranchInst::Create(TheOnlyDest, IBI);
+      
+      for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+        if (IBI->getDestination(i) == TheOnlyDest)
+          TheOnlyDest = 0;
+        else
+          IBI->getDestination(i)->removePredecessor(IBI->getParent());
+      }
+      IBI->eraseFromParent();
+      
+      // If we didn't find our destination in the IBI successor list, then we
+      // have undefined behavior.  Replace the unconditional branch with an
+      // 'unreachable' instruction.
+      if (TheOnlyDest) {
+        BB->getTerminator()->eraseFromParent();
+        new UnreachableInst(BB->getContext(), BB);
+      }
+      
+      return true;
+    }
+  }
+  
   return false;
 }
 
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index c22708a..cd8d952 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -46,7 +46,6 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -57,7 +56,7 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
 STATISTIC(NumNested  , "Number of nested loops split out");
 
 namespace {
-  struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass {
+  struct LoopSimplify : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     LoopSimplify() : LoopPass(&ID) {}
 
diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 4d838b5..d68427a 100644
--- a/lib/Transforms/Utils/UnrollLoop.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -44,8 +44,8 @@ static inline void RemapInstruction(Instruction *I,
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
     DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
-    if (It != ValueMap.end()) Op = It->second;
-    I->setOperand(op, Op);
+    if (It != ValueMap.end())
+      I->setOperand(op, It->second);
   }
 }
 
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 9a3de26..6e6e8d2 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -47,7 +47,6 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetLowering.h"
 #include <csetjmp>
 #include <set>
@@ -61,7 +60,7 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
  cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
 
 namespace {
-  class VISIBILITY_HIDDEN LowerInvoke : public FunctionPass {
+  class LowerInvoke : public FunctionPass {
     // Used for both models.
     Constant *WriteFn;
     Constant *AbortFn;
@@ -87,7 +86,6 @@ namespace {
       // This is a cluster of orthogonal Transforms
       AU.addPreservedID(PromoteMemoryToRegisterID);
       AU.addPreservedID(LowerSwitchID);
-      AU.addPreservedID(LowerAllocationsID);
     }
 
   private:
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 764f098..8c18b59 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -21,8 +21,8 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
@@ -31,7 +31,7 @@ namespace {
   /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
   /// instructions.  Note that this cannot be a BasicBlock pass because it
   /// modifies the CFG!
-  class VISIBILITY_HIDDEN LowerSwitch : public FunctionPass {
+  class LowerSwitch : public FunctionPass {
   public:
     static char ID; // Pass identification, replacement for typeid
     LowerSwitch() : FunctionPass(&ID) {} 
@@ -43,7 +43,6 @@ namespace {
       AU.addPreserved<UnifyFunctionExitNodes>();
       AU.addPreservedID(PromoteMemoryToRegisterID);
       AU.addPreservedID(LowerInvokePassID);
-      AU.addPreservedID(LowerAllocationsID);
     }
 
     struct CaseRange {
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 5df0832..9416604 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -20,13 +20,12 @@
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumPromoted, "Number of alloca's promoted");
 
 namespace {
-  struct VISIBILITY_HIDDEN PromotePass : public FunctionPass {
+  struct PromotePass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     PromotePass() : FunctionPass(&ID) {}
 
@@ -45,7 +44,6 @@ namespace {
       AU.addPreserved<UnifyFunctionExitNodes>();
       AU.addPreservedID(LowerSwitchID);
       AU.addPreservedID(LowerInvokePassID);
-      AU.addPreservedID(LowerAllocationsID);
     }
   };
 }  // end of anonymous namespace
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 9ca06bd..de6ad1d 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -32,7 +32,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -100,7 +99,7 @@ namespace {
   struct AllocaInfo;
 
   // Data package used by RenamePass()
-  class VISIBILITY_HIDDEN RenamePassData {
+  class RenamePassData {
   public:
     typedef std::vector<Value *> ValVector;
     
@@ -123,7 +122,7 @@ namespace {
   ///
   /// This functionality is important because it avoids scanning large basic
   /// blocks multiple times when promoting many allocas in the same block.
-  class VISIBILITY_HIDDEN LargeBlockInfo {
+  class LargeBlockInfo {
     /// InstNumbers - For each instruction that we track, keep the index of the
     /// instruction.  The index starts out as the number of the instruction from
     /// the start of the block.
@@ -170,7 +169,7 @@ namespace {
     }
   };
 
-  struct VISIBILITY_HIDDEN PromoteMem2Reg {
+  struct PromoteMem2Reg {
     /// Allocas - The alloca instructions being promoted.
     ///
     std::vector<AllocaInst*> Allocas;
@@ -750,7 +749,12 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
     }
     
     // Otherwise, we *can* safely rewrite this load.
-    LI->replaceAllUsesWith(OnlyStore->getOperand(0));
+    Value *ReplVal = OnlyStore->getOperand(0);
+    // If the replacement value is the load, this must occur in unreachable
+    // code.
+    if (ReplVal == LI)
+      ReplVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(ReplVal);
     if (AST && isa<PointerType>(LI->getType()))
       AST->deleteValue(LI);
     LI->eraseFromParent();
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
index 3bb2e8e..1c4afff 100644
--- a/lib/Transforms/Utils/SSI.cpp
+++ b/lib/Transforms/Utils/SSI.cpp
@@ -396,7 +396,7 @@ static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
 /// SSIEverything - A pass that runs createSSI on every non-void variable,
 /// intended for debugging.
 namespace {
-  struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass {
+  struct SSIEverything : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     SSIEverything() : FunctionPass(&ID) {}
 
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 6fd7d7b..8e1fb98 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -1748,6 +1749,68 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   return true;
 }
 
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+static bool EliminateDuplicatePHINodes(BasicBlock *BB) {
+  bool Changed = false;
+  
+  // This implementation doesn't currently consider undef operands
+  // specially. Theroetically, two phis which are identical except for
+  // one having an undef where the other doesn't could be collapsed.
+
+  // Map from PHI hash values to PHI nodes. If multiple PHIs have
+  // the same hash value, the element is the first PHI in the
+  // linked list in CollisionMap.
+  DenseMap<uintptr_t, PHINode *> HashMap;
+
+  // Maintain linked lists of PHI nodes with common hash values.
+  DenseMap<PHINode *, PHINode *> CollisionMap;
+
+  // Examine each PHI.
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I++); ) {
+    // Compute a hash value on the operands. Instcombine will likely have sorted
+    // them, which helps expose duplicates, but we have to check all the
+    // operands to be safe in case instcombine hasn't run.
+    uintptr_t Hash = 0;
+    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+      // This hash algorithm is quite weak as hash functions go, but it seems
+      // to do a good enough job for this particular purpose, and is very quick.
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    // If we've never seen this hash value before, it's a unique PHI.
+    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+      HashMap.insert(std::make_pair(Hash, PN));
+    if (Pair.second) continue;
+    // Otherwise it's either a duplicate or a hash collision.
+    for (PHINode *OtherPN = Pair.first->second; ; ) {
+      if (OtherPN->isIdenticalTo(PN)) {
+        // A duplicate. Replace this PHI with its duplicate.
+        PN->replaceAllUsesWith(OtherPN);
+        PN->eraseFromParent();
+        Changed = true;
+        break;
+      }
+      // A non-duplicate hash collision.
+      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+      if (I == CollisionMap.end()) {
+        // Set this PHI to be the head of the linked list of colliding PHIs.
+        PHINode *Old = Pair.first->second;
+        Pair.first->second = PN;
+        CollisionMap[PN] = Old;
+        break;
+      }
+      // Procede to the next PHI in the list.
+      OtherPN = I->second;
+    }
+  }
+
+  return Changed;
+}
 
 /// SimplifyCFG - This function is used to do simplification of a CFG.  For
 /// example, it adjusts branches to branches to eliminate the extra hop, it
@@ -1777,6 +1840,9 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   // away...
   Changed |= ConstantFoldTerminator(BB);
 
+  // Check for and eliminate duplicate PHI nodes in this block.
+  Changed |= EliminateDuplicatePHINodes(BB);
+
   // If there is a trivial two-entry PHI node in this basic block, and we can
   // eliminate it, do so now.
   if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 2d8332f..39331d7 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,18 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/DerivedTypes.h"  // For getNullValue(Type::Int32Ty)
 #include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/Function.h"
 #include "llvm/Metadata.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {
+Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
   Value *&VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
@@ -36,80 +33,91 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {
   if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MetadataBase>(V))
     return VMSlot = const_cast<Value*>(V);
 
-  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
-    if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
-        isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
-        isa<UndefValue>(C) || isa<MDString>(C))
-      return VMSlot = C;           // Primitive constants map directly
-    else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
-      for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
-           i != e; ++i) {
-        Value *MV = MapValue(*i, VM, Context);
-        if (MV != *i) {
-          // This array must contain a reference to a global, make a new array
-          // and return it.
-          //
-          std::vector<Constant*> Values;
-          Values.reserve(CA->getNumOperands());
-          for (User::op_iterator j = b; j != i; ++j)
-            Values.push_back(cast<Constant>(*j));
-          Values.push_back(cast<Constant>(MV));
-          for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
-          return VM[V] = ConstantArray::get(CA->getType(), Values);
-        }
+  Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+  if (C == 0) return 0;
+  
+  if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
+      isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
+      isa<UndefValue>(C) || isa<MDString>(C))
+    return VMSlot = C;           // Primitive constants map directly
+  
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+    for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
+         i != e; ++i) {
+      Value *MV = MapValue(*i, VM);
+      if (MV != *i) {
+        // This array must contain a reference to a global, make a new array
+        // and return it.
+        //
+        std::vector<Constant*> Values;
+        Values.reserve(CA->getNumOperands());
+        for (User::op_iterator j = b; j != i; ++j)
+          Values.push_back(cast<Constant>(*j));
+        Values.push_back(cast<Constant>(MV));
+        for (++i; i != e; ++i)
+          Values.push_back(cast<Constant>(MapValue(*i, VM)));
+        return VM[V] = ConstantArray::get(CA->getType(), Values);
       }
-      return VM[V] = C;
-
-    } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
-      for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
-           i != e; ++i) {
-        Value *MV = MapValue(*i, VM, Context);
-        if (MV != *i) {
-          // This struct must contain a reference to a global, make a new struct
-          // and return it.
-          //
-          std::vector<Constant*> Values;
-          Values.reserve(CS->getNumOperands());
-          for (User::op_iterator j = b; j != i; ++j)
-            Values.push_back(cast<Constant>(*j));
-          Values.push_back(cast<Constant>(MV));
-          for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
-          return VM[V] = ConstantStruct::get(CS->getType(), Values);
-        }
+    }
+    return VM[V] = C;
+  }
+  
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+    for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
+         i != e; ++i) {
+      Value *MV = MapValue(*i, VM);
+      if (MV != *i) {
+        // This struct must contain a reference to a global, make a new struct
+        // and return it.
+        //
+        std::vector<Constant*> Values;
+        Values.reserve(CS->getNumOperands());
+        for (User::op_iterator j = b; j != i; ++j)
+          Values.push_back(cast<Constant>(*j));
+        Values.push_back(cast<Constant>(MV));
+        for (++i; i != e; ++i)
+          Values.push_back(cast<Constant>(MapValue(*i, VM)));
+        return VM[V] = ConstantStruct::get(CS->getType(), Values);
       }
-      return VM[V] = C;
-
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-      std::vector<Constant*> Ops;
-      for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-        Ops.push_back(cast<Constant>(MapValue(*i, VM, Context)));
-      return VM[V] = CE->getWithOperands(Ops);
-    } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
-      for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end();
-           i != e; ++i) {
-        Value *MV = MapValue(*i, VM, Context);
-        if (MV != *i) {
-          // This vector value must contain a reference to a global, make a new
-          // vector constant and return it.
-          //
-          std::vector<Constant*> Values;
-          Values.reserve(CP->getNumOperands());
-          for (User::op_iterator j = b; j != i; ++j)
-            Values.push_back(cast<Constant>(*j));
-          Values.push_back(cast<Constant>(MV));
-          for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
-          return VM[V] = ConstantVector::get(Values);
-        }
+    }
+    return VM[V] = C;
+  }
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    std::vector<Constant*> Ops;
+    for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
+      Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+    return VM[V] = CE->getWithOperands(Ops);
+  }
+  
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+    for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
+         i != e; ++i) {
+      Value *MV = MapValue(*i, VM);
+      if (MV != *i) {
+        // This vector value must contain a reference to a global, make a new
+        // vector constant and return it.
+        //
+        std::vector<Constant*> Values;
+        Values.reserve(CV->getNumOperands());
+        for (User::op_iterator j = b; j != i; ++j)
+          Values.push_back(cast<Constant>(*j));
+        Values.push_back(cast<Constant>(MV));
+        for (++i; i != e; ++i)
+          Values.push_back(cast<Constant>(MapValue(*i, VM)));
+        return VM[V] = ConstantVector::get(Values);
       }
-      return VM[V] = C;
-      
-    } else {
-      llvm_unreachable("Unknown type of constant!");
     }
+    return VM[V] = C;
   }
+  
+  if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+    Function *F = cast<Function>(MapValue(BA->getFunction(), VM));
+    BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM));
+    return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+  }
+  
+  llvm_unreachable("Unknown type of constant!");
   return 0;
 }
 
@@ -118,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {
 ///
 void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, ValueMap, I->getParent()->getContext());
+    Value *V = MapValue(*op, ValueMap);
     assert(V && "Referenced value not in value map!");
     *op = V;
   }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index d8a708d..9a803a1 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/Instruction.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
@@ -1059,6 +1060,15 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     Out << "zeroinitializer";
     return;
   }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    Out << "blockaddress(";
+    WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine);
+    Out << ", ";
+    WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine);
+    Out << ")";
+    return;
+  }
 
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
     // As a special case, print the array as a string if it is an array of
@@ -1831,7 +1841,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     writeOperand(BI.getSuccessor(1), true);
 
   } else if (isa<SwitchInst>(I)) {
-    // Special case switch statement to get formatting nice and correct...
+    // Special case switch instruction to get formatting nice and correct.
     Out << ' ';
     writeOperand(Operand        , true);
     Out << ", ";
@@ -1845,6 +1855,18 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       writeOperand(I.getOperand(op+1), true);
     }
     Out << "\n  ]";
+  } else if (isa<IndirectBrInst>(I)) {
+    // Special case indirectbr instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(Operand, true);
+    Out << ", [";
+    
+    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+      if (i != 1)
+        Out << ", ";
+      writeOperand(I.getOperand(i), true);
+    }
+    Out << ']';
   } else if (isa<PHINode>(I)) {
     Out << ' ';
     TypePrinter.print(I.getType(), Out);
@@ -1966,7 +1988,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << " unwind ";
     writeOperand(II->getUnwindDest(), true);
 
-  } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(&I)) {
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
     Out << ' ';
     TypePrinter.print(AI->getType()->getElementType(), Out);
     if (!AI->getArraySize() || AI->isArrayAllocation()) {
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 50cf84c..23d0557 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -58,6 +58,24 @@ BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
 
 
 BasicBlock::~BasicBlock() {
+  // If the address of the block is taken and it is being deleted (e.g. because
+  // it is dead), this means that there is either a dangling constant expr
+  // hanging off the block, or an undefined use of the block (source code
+  // expecting the address of a label to keep the block alive even though there
+  // is no indirect branch).  Handle these cases by zapping the BlockAddress
+  // nodes.  There are no other possible uses at this point.
+  if (hasAddressTaken()) {
+    assert(!use_empty() && "There should be at least one blockaddress!");
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
+    while (!use_empty()) {
+      BlockAddress *BA = cast<BlockAddress>(use_back());
+      BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                       BA->getType()));
+      BA->destroyConstant();
+    }
+  }
+  
   assert(getParent() == 0 && "BasicBlock still linked into the program!");
   dropAllReferences();
   InstList.clear();
@@ -277,3 +295,4 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   }
   return New;
 }
+
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 2c0a67f..7f713d1 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -215,7 +215,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
   switch (CE->getOpcode()) {
   default: return 0;
   case Instruction::Or: {
-    Constant *RHS = ExtractConstantBytes(C->getOperand(1), ByteStart, ByteSize);
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
     if (RHS == 0)
       return 0;
     
@@ -224,13 +224,13 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
       if (RHSC->isAllOnesValue())
         return RHSC;
     
-    Constant *LHS = ExtractConstantBytes(C->getOperand(0), ByteStart, ByteSize);
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
     if (LHS == 0)
       return 0;
     return ConstantExpr::getOr(LHS, RHS);
   }
   case Instruction::And: {
-    Constant *RHS = ExtractConstantBytes(C->getOperand(1), ByteStart, ByteSize);
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
     if (RHS == 0)
       return 0;
     
@@ -238,7 +238,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
     if (RHS->isNullValue())
       return RHS;
     
-    Constant *LHS = ExtractConstantBytes(C->getOperand(0), ByteStart, ByteSize);
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
     if (LHS == 0)
       return 0;
     return ConstantExpr::getAnd(LHS, RHS);
@@ -259,7 +259,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
                                                      ByteSize*8));
     // If the extract is known to be fully in the input, extract it.
     if (ByteStart+ByteSize+ShAmt <= CSize)
-      return ExtractConstantBytes(C->getOperand(0), ByteStart+ShAmt, ByteSize);
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
     
     // TODO: Handle the 'partially zero' case.
     return 0;
@@ -281,7 +281,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
                                                      ByteSize*8));
     // If the extract is known to be fully in the input, extract it.
     if (ByteStart >= ShAmt)
-      return ExtractConstantBytes(C->getOperand(0), ByteStart-ShAmt, ByteSize);
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
     
     // TODO: Handle the 'partially zero' case.
     return 0;
@@ -289,7 +289,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
       
   case Instruction::ZExt: {
     unsigned SrcBitSize =
-      cast<IntegerType>(C->getOperand(0)->getType())->getBitWidth();
+      cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
     
     // If extracting something that is completely zero, return 0.
     if (ByteStart*8 >= SrcBitSize)
@@ -298,18 +298,18 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
 
     // If exactly extracting the input, return it.
     if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
-      return C->getOperand(0);
+      return CE->getOperand(0);
     
     // If extracting something completely in the input, if if the input is a
     // multiple of 8 bits, recurse.
     if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
-      return ExtractConstantBytes(C->getOperand(0), ByteStart, ByteSize);
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
       
     // Otherwise, if extracting a subset of the input, which is not multiple of
     // 8 bits, do a shift and trunc to get the bits.
     if ((ByteStart+ByteSize)*8 < SrcBitSize) {
       assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
-      Constant *Res = C->getOperand(0);
+      Constant *Res = CE->getOperand(0);
       if (ByteStart)
         Res = ConstantExpr::getLShr(Res, 
                                  ConstantInt::get(Res->getType(), ByteStart*8));
@@ -634,7 +634,15 @@ Constant *llvm::ConstantFoldExtractValueInstruction(LLVMContext &Context,
                                                               Idxs + NumIdx));
 
   // Otherwise recurse.
-  return ConstantFoldExtractValueInstruction(Context, Agg->getOperand(*Idxs),
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
+    return ConstantFoldExtractValueInstruction(Context, CS->getOperand(*Idxs),
+                                               Idxs+1, NumIdx-1);
+
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
+    return ConstantFoldExtractValueInstruction(Context, CA->getOperand(*Idxs),
+                                               Idxs+1, NumIdx-1);
+  ConstantVector *CV = cast<ConstantVector>(Agg);
+  return ConstantFoldExtractValueInstruction(Context, CV->getOperand(*Idxs),
                                              Idxs+1, NumIdx-1);
 }
 
@@ -714,11 +722,10 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
     // Insertion of constant into aggregate constant.
     std::vector<Constant*> Ops(Agg->getNumOperands());
     for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
-      Constant *Op =
-        (*Idxs == i) ?
-        ConstantFoldInsertValueInstruction(Context, Agg->getOperand(i),
-                                           Val, Idxs+1, NumIdx-1) :
-        Agg->getOperand(i);
+      Constant *Op = cast<Constant>(Agg->getOperand(i));
+      if (*Idxs == i)
+        Op = ConstantFoldInsertValueInstruction(Context, Op,
+                                                Val, Idxs+1, NumIdx-1);
       Ops[i] = Op;
     }
     
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 02c3352..000a063 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Constant* classes...
+// This file implements the Constant* classes.
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,9 +29,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/RWMutex.h"
-#include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
@@ -44,7 +41,7 @@ using namespace llvm;
 
 // Constructor to create a '0' constant of arbitrary type...
 static const uint64_t zero[2] = {0, 0};
-Constant* Constant::getNullValue(const Type* Ty) {
+Constant *Constant::getNullValue(const Type *Ty) {
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
     return ConstantInt::get(Ty, 0);
@@ -72,7 +69,7 @@ Constant* Constant::getNullValue(const Type* Ty) {
   }
 }
 
-Constant* Constant::getIntegerValue(const Type* Ty, const APInt &V) {
+Constant* Constant::getIntegerValue(const Type *Ty, const APInt &V) {
   const Type *ScalarTy = Ty->getScalarType();
 
   // Create the base integer constant.
@@ -89,13 +86,13 @@ Constant* Constant::getIntegerValue(const Type* Ty, const APInt &V) {
   return C;
 }
 
-Constant* Constant::getAllOnesValue(const Type* Ty) {
-  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty))
+Constant* Constant::getAllOnesValue(const Type *Ty) {
+  if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
     return ConstantInt::get(Ty->getContext(),
                             APInt::getAllOnesValue(ITy->getBitWidth()));
   
   std::vector<Constant*> Elts;
-  const VectorType* VTy = cast<VectorType>(Ty);
+  const VectorType *VTy = cast<VectorType>(Ty);
   Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
   assert(Elts[0] && "Not a vector integer type!");
   return cast<ConstantVector>(ConstantVector::get(Elts));
@@ -140,7 +137,7 @@ bool Constant::canTrap() const {
   
   // ConstantExpr traps if any operands can trap. 
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (getOperand(i)->canTrap()) 
+    if (CE->getOperand(i)->canTrap()) 
       return true;
 
   // Otherwise, only specific operations can trap.
@@ -154,12 +151,27 @@ bool Constant::canTrap() const {
   case Instruction::SRem:
   case Instruction::FRem:
     // Div and rem can trap if the RHS is not known to be non-zero.
-    if (!isa<ConstantInt>(getOperand(1)) || getOperand(1)->isNullValue())
+    if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
       return true;
     return false;
   }
 }
 
+/// isConstantUsed - Return true if the constant has users other than constant
+/// exprs and other dangling things.
+bool Constant::isConstantUsed() const {
+  for (use_const_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    const Constant *UC = dyn_cast<Constant>(*UI);
+    if (UC == 0 || isa<GlobalValue>(UC))
+      return true;
+    
+    if (UC->isConstantUsed())
+      return true;
+  }
+  return false;
+}
+
+
 
 /// getRelocationInfo - This method classifies the entry according to
 /// whether or not it may generate a relocation entry.  This must be
@@ -182,9 +194,13 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
     return GlobalRelocations;    // Global reference.
   }
   
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
+    return BA->getFunction()->getRelocationInfo();
+  
   PossibleRelocationsTy Result = NoRelocation;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    Result = std::max(Result, getOperand(i)->getRelocationInfo());
+    Result = std::max(Result,
+                      cast<Constant>(getOperand(i))->getRelocationInfo());
   
   return Result;
 }
@@ -987,7 +1003,7 @@ Constant *ConstantVector::getSplatValue() {
   return Elt;
 }
 
-//---- ConstantPointerNull::get() implementation...
+//---- ConstantPointerNull::get() implementation.
 //
 
 ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
@@ -1004,23 +1020,95 @@ void ConstantPointerNull::destroyConstant() {
 }
 
 
-//---- UndefValue::get() implementation...
+//---- UndefValue::get() implementation.
 //
 
 UndefValue *UndefValue::get(const Type *Ty) {
-  // Implicitly locked.
   return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
-  // Implicitly locked.
   getType()->getContext().pImpl->UndefValueConstants.remove(this);
   destroyConstantImpl();
 }
 
-//---- ConstantExpr::get() implementations...
+//---- BlockAddress::get() implementation.
+//
+
+BlockAddress *BlockAddress::get(BasicBlock *BB) {
+  assert(BB->getParent() != 0 && "Block must have a parent");
+  return get(BB->getParent(), BB);
+}
+
+BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
+  BlockAddress *&BA =
+    F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
+  if (BA == 0)
+    BA = new BlockAddress(F, BB);
+  
+  assert(BA->getFunction() == F && "Basic block moved between functions");
+  return BA;
+}
+
+BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
+: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
+           &Op<0>(), 2) {
+  setOperand(0, F);
+  setOperand(1, BB);
+  BB->AdjustBlockAddressRefCount(1);
+}
+
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void BlockAddress::destroyConstant() {
+  getFunction()->getType()->getContext().pImpl
+    ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
+  getBasicBlock()->AdjustBlockAddressRefCount(-1);
+  destroyConstantImpl();
+}
+
+void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+  // This could be replacing either the Basic Block or the Function.  In either
+  // case, we have to remove the map entry.
+  Function *NewF = getFunction();
+  BasicBlock *NewBB = getBasicBlock();
+  
+  if (U == &Op<0>())
+    NewF = cast<Function>(To);
+  else
+    NewBB = cast<BasicBlock>(To);
+  
+  // See if the 'new' entry already exists, if not, just update this in place
+  // and return early.
+  BlockAddress *&NewBA =
+    getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
+  if (NewBA == 0) {
+    getBasicBlock()->AdjustBlockAddressRefCount(-1);
+    
+    // Remove the old entry, this can't cause the map to rehash (just a
+    // tombstone will get added).
+    getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+                                                            getBasicBlock()));
+    NewBA = this;
+    setOperand(0, NewF);
+    setOperand(1, NewBB);
+    getBasicBlock()->AdjustBlockAddressRefCount(1);
+    return;
+  }
+
+  // Otherwise, I do need to replace this with an existing value.
+  assert(NewBA != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(NewBA);
+  
+  destroyConstant();
+}
+
+//---- ConstantExpr::get() implementations.
 //
 
 /// This is a utility function to handle folding of casts and lookup of the
@@ -1838,7 +1926,7 @@ const char *ConstantExpr::getOpcodeName() const {
 /// single invocation handles all 1000 uses.  Handling them one at a time would
 /// work, but would be really slow because it would have to unique each updated
 /// array instance.
-
+///
 void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
                                                 Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 526b4b1..268a660 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -20,8 +20,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/RWMutex.h"
 #include <map>
 
 namespace llvm {
@@ -332,7 +330,7 @@ struct ExprMapKeyType {
 // The number of operands for each ConstantCreator::create method is
 // determined by the ConstantTraits template.
 // ConstantCreator - A class that is used to create constants by
-// ValueMap*.  This class should be partially specialized if there is
+// ConstantUniqueMap*.  This class should be partially specialized if there is
 // something strange that needs to be done to interface to the ctor for the
 // constant.
 //
@@ -506,7 +504,7 @@ struct ConstantKeyData<UndefValue> {
 
 template<class ValType, class TypeClass, class ConstantClass,
          bool HasLargeKey = false /*true for arrays and structs*/ >
-class ValueMap : public AbstractTypeUser {
+class ConstantUniqueMap : public AbstractTypeUser {
 public:
   typedef std::pair<const TypeClass*, ValType> MapKey;
   typedef std::map<MapKey, ConstantClass *> MapTy;
@@ -529,12 +527,7 @@ private:
   ///
   AbstractTypeMapTy AbstractTypeMap;
     
-  /// ValueMapLock - Mutex for this map.
-  sys::SmartMutex<true> ValueMapLock;
-
 public:
-  // NOTE: This function is not locked.  It is the caller's responsibility
-  // to enforce proper synchronization.
   typename MapTy::iterator map_begin() { return Map.begin(); }
   typename MapTy::iterator map_end() { return Map.end(); }
 
@@ -551,8 +544,6 @@ public:
   /// entry and Exists=true.  If not, the iterator points to the newly
   /// inserted entry and returns Exists=false.  Newly inserted entries have
   /// I->second == 0, and should be filled in.
-  /// NOTE: This function is not locked.  It is the caller's responsibility
-  // to enforce proper synchronization.
   typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
                                  &InsertVal,
                                  bool &Exists) {
@@ -619,7 +610,6 @@ public:
   /// getOrCreate - Return the specified constant from the map, creating it if
   /// necessary.
   ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
-    sys::SmartScopedLock<true> Lock(ValueMapLock);
     MapKey Lookup(Ty, V);
     ConstantClass* Result = 0;
     
@@ -674,7 +664,6 @@ public:
   }
 
   void remove(ConstantClass *CP) {
-    sys::SmartScopedLock<true> Lock(ValueMapLock);
     typename MapTy::iterator I = FindExistingElement(CP);
     assert(I != Map.end() && "Constant not found in constant table!");
     assert(I->second == CP && "Didn't find correct element?");
@@ -694,8 +683,6 @@ public:
   /// MoveConstantToNewSlot - If we are about to change C to be the element
   /// specified by I, update our internal data structures to reflect this
   /// fact.
-  /// NOTE: This function is not locked. It is the responsibility of the
-  /// caller to enforce proper synchronization if using this method.
   void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
     // First, remove the old location of the specified constant in the map.
     typename MapTy::iterator OldI = FindExistingElement(C);
@@ -725,7 +712,6 @@ public:
   }
     
   void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-    sys::SmartScopedLock<true> Lock(ValueMapLock);
     typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(OldTy);
 
     assert(I != AbstractTypeMap.end() &&
@@ -778,7 +764,7 @@ public:
   }
 
   void dump() const {
-    DEBUG(errs() << "Constant.cpp: ValueMap\n");
+    DEBUG(errs() << "Constant.cpp: ConstantUniqueMap\n");
   }
 };
 
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index a28037d..9a49d42 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1724,7 +1724,8 @@ LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
 }
 
 LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
-  return wrap(unwrap(B)->CreateFree(unwrap(PointerVal)));
+  return wrap(unwrap(B)->Insert(
+     CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
 }
 
 
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index b49faf8..26c02e0 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -322,7 +322,7 @@ DominanceFrontier::calculate(const DominatorTree &DT,
 
 void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    OS << "  DomFrontier for BB";
+    OS << "  DomFrontier for BB ";
     if (I->first)
       WriteAsOperand(OS, I->first, false);
     else
@@ -332,11 +332,13 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
     const std::set<BasicBlock*> &BBs = I->second;
     
     for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
-         I != E; ++I)
+         I != E; ++I) {
+      OS << ' ';
       if (*I)
         WriteAsOperand(OS, *I, false);
       else
-        OS << " <<exit node>>";
+        OS << "<<exit node>>";
+    }
     OS << "\n";
   }
 }
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 8ad885c..6cf2c81 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -217,7 +217,20 @@ void Function::setParent(Module *parent) {
 void Function::dropAllReferences() {
   for (iterator I = begin(), E = end(); I != E; ++I)
     I->dropAllReferences();
-  BasicBlocks.clear();    // Delete all basic blocks...
+  
+  // Delete all basic blocks.
+  while (!BasicBlocks.empty()) {
+    // If there is still a reference to the block, it must be a 'blockaddress'
+    // constant pointing to it.  Just replace the BlockAddress with undef.
+    BasicBlock *BB = BasicBlocks.begin();
+    if (!BB->use_empty()) {
+      BlockAddress *BA = cast<BlockAddress>(BB->use_back());
+      BA->replaceAllUsesWith(UndefValue::get(BA->getType()));
+      BA->destroyConstant();
+    }
+    
+    BB->eraseFromParent();
+  }
 }
 
 void Function::addAttribute(unsigned i, Attributes attr) {
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index d18a201..03ceecb 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -75,6 +75,7 @@ void GlobalValue::removeDeadConstantUsers() const {
   }
 }
 
+
 /// Override destroyConstant to make sure it doesn't get called on
 /// GlobalValue's because they shouldn't be treated like other constants.
 void GlobalValue::destroyConstant() {
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index dd8a543..ce253d6 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -103,6 +103,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case Ret:    return "ret";
   case Br:     return "br";
   case Switch: return "switch";
+  case IndirectBr: return "indirectbr";
   case Invoke: return "invoke";
   case Unwind: return "unwind";
   case Unreachable: return "unreachable";
@@ -127,7 +128,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case Xor: return "xor";
 
   // Memory instructions...
-  case Free:          return "free";
   case Alloca:        return "alloca";
   case Load:          return "load";
   case Store:         return "store";
@@ -308,7 +308,6 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
 bool Instruction::mayReadFromMemory() const {
   switch (getOpcode()) {
   default: return false;
-  case Instruction::Free:
   case Instruction::VAArg:
   case Instruction::Load:
     return true;
@@ -326,7 +325,6 @@ bool Instruction::mayReadFromMemory() const {
 bool Instruction::mayWriteToMemory() const {
   switch (getOpcode()) {
   default: return false;
-  case Instruction::Free:
   case Instruction::Store:
   case Instruction::VAArg:
     return true;
@@ -380,7 +378,7 @@ bool Instruction::isCommutative(unsigned op) {
   }
 }
 
-// Code here matches isMalloc from MallocHelper, which is not in VMCore.
+// Code here matches isMalloc from MemoryBuiltins, which is not in VMCore.
 static bool isMalloc(const Value* I) {
   const CallInst *CI = dyn_cast<CallInst>(I);
   if (!CI) {
@@ -390,15 +388,25 @@ static bool isMalloc(const Value* I) {
     CI = dyn_cast<CallInst>(BCI->getOperand(0));
   }
 
-  if (!CI) return false;
-
-  const Module* M = CI->getParent()->getParent()->getParent();
-  Constant *MallocFunc = M->getFunction("malloc");
+  if (!CI)
+    return false;
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc")
+    return false;
 
-  if (CI->getOperand(0) != MallocFunc)
+  // Check malloc prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (FTy->getNumParams() != 1)
     return false;
+  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
+    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
+      return false;
+    return true;
+  }
 
-  return true;
+  return false;
 }
 
 bool Instruction::isSafeToSpeculativelyExecute() const {
@@ -426,7 +434,7 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
   case Load: {
     if (cast<LoadInst>(this)->isVolatile())
       return false;
-    if (isa<AllocationInst>(getOperand(0)) || isMalloc(getOperand(0)))
+    if (isa<AllocaInst>(getOperand(0)) || isMalloc(getOperand(0)))
       return true;
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0)))
       return !GV->hasExternalWeakLinkage();
@@ -444,7 +452,6 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
   case Invoke:
   case PHI:
   case Store:
-  case Free:
   case Ret:
   case Br:
   case Switch:
@@ -453,3 +460,11 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
     return false; // Misc instructions which have effects
   }
 }
+
+Instruction *Instruction::clone() const {
+  Instruction *New = clone_impl();
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (hasMetadata())
+    getContext().pImpl->TheMetadata.ValueIsCloned(this, New);
+  return New;
+}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index e212d5c..52d8735 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -559,6 +559,51 @@ Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
                       ArraySize, MallocF, Name);
 }
 
+static Instruction* createFree(Value* Source, Instruction *InsertBefore,
+                               BasicBlock *InsertAtEnd) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createFree needs either InsertBefore or InsertAtEnd");
+  assert(isa<PointerType>(Source->getType()) &&
+         "Can not free something of nonpointer type!");
+
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+
+  const Type *VoidTy = Type::getVoidTy(M->getContext());
+  const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+  // prototype free as "void free(void*)"
+  Constant *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+
+  CallInst* Result = NULL;
+  Value *PtrCast = Source;
+  if (InsertBefore) {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
+    Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
+  } else {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
+    Result = CallInst::Create(FreeFunc, PtrCast, "");
+  }
+  Result->setTailCall();
+
+  return Result;
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+void CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+  createFree(Source, InsertBefore, NULL);
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+/// Note: This function does not add the call to the basic block, that is the
+/// responsibility of the caller.
+Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
+  Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+  assert(FreeCall && "CreateFree did not create a CallInst");
+  return FreeCall;
+}
+
 //===----------------------------------------------------------------------===//
 //                        InvokeInst Implementation
 //===----------------------------------------------------------------------===//
@@ -838,7 +883,7 @@ void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 
 
 //===----------------------------------------------------------------------===//
-//                        AllocationInst Implementation
+//                        AllocaInst Implementation
 //===----------------------------------------------------------------------===//
 
 static Value *getAISize(LLVMContext &Context, Value *Amt) {
@@ -853,20 +898,54 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
   return Amt;
 }
 
-AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
-                               unsigned Align, const Twine &Name,
-                               Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), iTy,
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(0);
+  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(0);
+  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+                       Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertBefore) {
+  setAlignment(0);
+  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+                       BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertAtEnd) {
+  setAlignment(0);
+  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), ArraySize), InsertBefore) {
   setAlignment(Align);
   assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
   setName(Name);
 }
 
-AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
-                               unsigned Align, const Twine &Name,
-                               BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), iTy,
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
   setAlignment(Align);
   assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
@@ -874,22 +953,22 @@ AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
 }
 
 // Out of line virtual method, so the vtable, etc has a home.
-AllocationInst::~AllocationInst() {
+AllocaInst::~AllocaInst() {
 }
 
-void AllocationInst::setAlignment(unsigned Align) {
+void AllocaInst::setAlignment(unsigned Align) {
   assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
   SubclassData = Log2_32(Align) + 1;
   assert(getAlignment() == Align && "Alignment representation error!");
 }
 
-bool AllocationInst::isArrayAllocation() const {
+bool AllocaInst::isArrayAllocation() const {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
     return CI->getZExtValue() != 1;
   return true;
 }
 
-const Type *AllocationInst::getAllocatedType() const {
+const Type *AllocaInst::getAllocatedType() const {
   return getType()->getElementType();
 }
 
@@ -906,28 +985,6 @@ bool AllocaInst::isStaticAlloca() const {
 }
 
 //===----------------------------------------------------------------------===//
-//                             FreeInst Implementation
-//===----------------------------------------------------------------------===//
-
-void FreeInst::AssertOK() {
-  assert(isa<PointerType>(getOperand(0)->getType()) &&
-         "Can not free something of nonpointer type!");
-}
-
-FreeInst::FreeInst(Value *Ptr, Instruction *InsertBefore)
-  : UnaryInstruction(Type::getVoidTy(Ptr->getContext()),
-                     Free, Ptr, InsertBefore) {
-  AssertOK();
-}
-
-FreeInst::FreeInst(Value *Ptr, BasicBlock *InsertAtEnd)
-  : UnaryInstruction(Type::getVoidTy(Ptr->getContext()),
-                     Free, Ptr, InsertAtEnd) {
-  AssertOK();
-}
-
-
-//===----------------------------------------------------------------------===//
 //                           LoadInst Implementation
 //===----------------------------------------------------------------------===//
 
@@ -2780,17 +2837,6 @@ ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
   }
 }
 
-bool ICmpInst::isSignedPredicate(Predicate pred) {
-  switch (pred) {
-    default: assert(! "Unknown icmp predicate!");
-    case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: 
-      return true;
-    case ICMP_EQ:  case ICMP_NE: case ICMP_UGT: case ICMP_ULT: 
-    case ICMP_UGE: case ICMP_ULE:
-      return false;
-  }
-}
-
 /// Initialize a set of values that all satisfy the condition with C.
 ///
 ConstantRange 
@@ -2864,7 +2910,7 @@ bool CmpInst::isUnsigned(unsigned short predicate) {
   }
 }
 
-bool CmpInst::isSigned(unsigned short predicate){
+bool CmpInst::isSigned(unsigned short predicate) {
   switch (predicate) {
     default: return false;
     case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: 
@@ -2890,6 +2936,23 @@ bool CmpInst::isUnordered(unsigned short predicate) {
   }
 }
 
+bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+    default: return false;
+    case ICMP_EQ:   case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
+    case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
+  }
+}
+
+bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+  case ICMP_NE:    case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
+  case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
+  default: return false;
+  }
+}
+
+
 //===----------------------------------------------------------------------===//
 //                        SwitchInst Implementation
 //===----------------------------------------------------------------------===//
@@ -3023,364 +3086,272 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
   setSuccessor(idx, B);
 }
 
+//===----------------------------------------------------------------------===//
+//                        SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void IndirectBrInst::init(Value *Address, unsigned NumDests) {
+  assert(Address && isa<PointerType>(Address->getType()) &&
+         "Address of indirectbr must be a pointer");
+  ReservedSpace = 1+NumDests;
+  NumOperands = 1;
+  OperandList = allocHungoffUses(ReservedSpace);
+  
+  OperandList[0] = Address;
+}
+
+
+/// resizeOperands - resize operands - This adjusts the length of the operands
+/// list according to the following behavior:
+///   1. If NumOps == 0, grow the operand list in response to a push_back style
+///      of operation.  This grows the number of ops by 2 times.
+///   2. If NumOps > NumOperands, reserve space for NumOps operands.
+///   3. If NumOps == NumOperands, trim the reserved space.
+///
+void IndirectBrInst::resizeOperands(unsigned NumOps) {
+  unsigned e = getNumOperands();
+  if (NumOps == 0) {
+    NumOps = e*2;
+  } else if (NumOps*2 > NumOperands) {
+    // No resize needed.
+    if (ReservedSpace >= NumOps) return;
+  } else if (NumOps == NumOperands) {
+    if (ReservedSpace == NumOps) return;
+  } else {
+    return;
+  }
+  
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i)
+    NewOps[i] = OldOps[i];
+  OperandList = NewOps;
+  if (OldOps) Use::zap(OldOps, OldOps + e, true);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               Instruction *InsertBefore)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertBefore) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               BasicBlock *InsertAtEnd)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertAtEnd) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
+  : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+                   allocHungoffUses(IBI.getNumOperands()),
+                   IBI.getNumOperands()) {
+  Use *OL = OperandList, *InOL = IBI.OperandList;
+  for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
+    OL[i] = InOL[i];
+  SubclassOptionalData = IBI.SubclassOptionalData;
+}
+
+IndirectBrInst::~IndirectBrInst() {
+  dropHungoffUses(OperandList);
+}
+
+/// addDestination - Add a destination.
+///
+void IndirectBrInst::addDestination(BasicBlock *DestBB) {
+  unsigned OpNo = NumOperands;
+  if (OpNo+1 > ReservedSpace)
+    resizeOperands(0);  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+1;
+  OperandList[OpNo] = DestBB;
+}
+
+/// removeDestination - This method removes the specified successor from the
+/// indirectbr instruction.
+void IndirectBrInst::removeDestination(unsigned idx) {
+  assert(idx < getNumOperands()-1 && "Successor index out of range!");
+  
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Replace this value with the last one.
+  OL[idx+1] = OL[NumOps-1];
+  
+  // Nuke the last value.
+  OL[NumOps-1].set(0);
+  NumOperands = NumOps-1;
+}
+
+BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned IndirectBrInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                           clone_impl() implementations
+//===----------------------------------------------------------------------===//
+
 // Define these methods here so vtables don't get emitted into every translation
 // unit that uses these classes.
 
-GetElementPtrInst *GetElementPtrInst::clone() const {
-  GetElementPtrInst *New = new(getNumOperands()) GetElementPtrInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+  return new (getNumOperands()) GetElementPtrInst(*this);
 }
 
-BinaryOperator *BinaryOperator::clone() const {
-  BinaryOperator *New = Create(getOpcode(), Op<0>(), Op<1>());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+BinaryOperator *BinaryOperator::clone_impl() const {
+  return Create(getOpcode(), Op<0>(), Op<1>());
 }
 
-FCmpInst* FCmpInst::clone() const {
-  FCmpInst *New = new FCmpInst(getPredicate(), Op<0>(), Op<1>());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
-}
-ICmpInst* ICmpInst::clone() const {
-  ICmpInst *New = new ICmpInst(getPredicate(), Op<0>(), Op<1>());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+FCmpInst* FCmpInst::clone_impl() const {
+  return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
 }
 
-ExtractValueInst *ExtractValueInst::clone() const {
-  ExtractValueInst *New = new ExtractValueInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
-}
-InsertValueInst *InsertValueInst::clone() const {
-  InsertValueInst *New = new InsertValueInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+ICmpInst* ICmpInst::clone_impl() const {
+  return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
 }
 
-AllocaInst *AllocaInst::clone() const {
-  AllocaInst *New = new AllocaInst(getAllocatedType(),
-                                   (Value*)getOperand(0),
-                                   getAlignment());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+ExtractValueInst *ExtractValueInst::clone_impl() const {
+  return new ExtractValueInst(*this);
 }
 
-FreeInst *FreeInst::clone() const {
-  FreeInst *New = new FreeInst(getOperand(0));
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+InsertValueInst *InsertValueInst::clone_impl() const {
+  return new InsertValueInst(*this);
 }
 
-LoadInst *LoadInst::clone() const {
-  LoadInst *New = new LoadInst(getOperand(0),
-                               Twine(), isVolatile(),
-                               getAlignment());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+AllocaInst *AllocaInst::clone_impl() const {
+  return new AllocaInst(getAllocatedType(),
+                        (Value*)getOperand(0),
+                        getAlignment());
 }
 
-StoreInst *StoreInst::clone() const {
-  StoreInst *New = new StoreInst(getOperand(0), getOperand(1),
-                                 isVolatile(), getAlignment());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+LoadInst *LoadInst::clone_impl() const {
+  return new LoadInst(getOperand(0),
+                      Twine(), isVolatile(),
+                      getAlignment());
 }
 
-TruncInst *TruncInst::clone() const {
-  TruncInst *New = new TruncInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+StoreInst *StoreInst::clone_impl() const {
+  return new StoreInst(getOperand(0), getOperand(1),
+                       isVolatile(), getAlignment());
 }
 
-ZExtInst *ZExtInst::clone() const {
-  ZExtInst *New = new ZExtInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+TruncInst *TruncInst::clone_impl() const {
+  return new TruncInst(getOperand(0), getType());
 }
 
-SExtInst *SExtInst::clone() const {
-  SExtInst *New = new SExtInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+ZExtInst *ZExtInst::clone_impl() const {
+  return new ZExtInst(getOperand(0), getType());
 }
 
-FPTruncInst *FPTruncInst::clone() const {
-  FPTruncInst *New = new FPTruncInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+SExtInst *SExtInst::clone_impl() const {
+  return new SExtInst(getOperand(0), getType());
 }
 
-FPExtInst *FPExtInst::clone() const {
-  FPExtInst *New = new FPExtInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+FPTruncInst *FPTruncInst::clone_impl() const {
+  return new FPTruncInst(getOperand(0), getType());
 }
 
-UIToFPInst *UIToFPInst::clone() const {
-  UIToFPInst *New = new UIToFPInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+FPExtInst *FPExtInst::clone_impl() const {
+  return new FPExtInst(getOperand(0), getType());
 }
 
-SIToFPInst *SIToFPInst::clone() const {
-  SIToFPInst *New = new SIToFPInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+UIToFPInst *UIToFPInst::clone_impl() const {
+  return new UIToFPInst(getOperand(0), getType());
 }
 
-FPToUIInst *FPToUIInst::clone() const {
-  FPToUIInst *New = new FPToUIInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+SIToFPInst *SIToFPInst::clone_impl() const {
+  return new SIToFPInst(getOperand(0), getType());
 }
 
-FPToSIInst *FPToSIInst::clone() const {
-  FPToSIInst *New = new FPToSIInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+FPToUIInst *FPToUIInst::clone_impl() const {
+  return new FPToUIInst(getOperand(0), getType());
 }
 
-PtrToIntInst *PtrToIntInst::clone() const {
-  PtrToIntInst *New = new PtrToIntInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+FPToSIInst *FPToSIInst::clone_impl() const {
+  return new FPToSIInst(getOperand(0), getType());
 }
 
-IntToPtrInst *IntToPtrInst::clone() const {
-  IntToPtrInst *New = new IntToPtrInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+PtrToIntInst *PtrToIntInst::clone_impl() const {
+  return new PtrToIntInst(getOperand(0), getType());
 }
 
-BitCastInst *BitCastInst::clone() const {
-  BitCastInst *New = new BitCastInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+IntToPtrInst *IntToPtrInst::clone_impl() const {
+  return new IntToPtrInst(getOperand(0), getType());
 }
 
-CallInst *CallInst::clone() const {
-  CallInst *New = new(getNumOperands()) CallInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+BitCastInst *BitCastInst::clone_impl() const {
+  return new BitCastInst(getOperand(0), getType());
 }
 
-SelectInst *SelectInst::clone() const {
-  SelectInst *New = SelectInst::Create(getOperand(0),
-                                       getOperand(1),
-                                       getOperand(2));
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+CallInst *CallInst::clone_impl() const {
+  return  new(getNumOperands()) CallInst(*this);
 }
 
-VAArgInst *VAArgInst::clone() const {
-  VAArgInst *New = new VAArgInst(getOperand(0), getType());
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+SelectInst *SelectInst::clone_impl() const {
+  return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
 }
 
-ExtractElementInst *ExtractElementInst::clone() const {
-  ExtractElementInst *New = ExtractElementInst::Create(getOperand(0),
-                                                       getOperand(1));
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+VAArgInst *VAArgInst::clone_impl() const {
+  return new VAArgInst(getOperand(0), getType());
 }
 
-InsertElementInst *InsertElementInst::clone() const {
-  InsertElementInst *New = InsertElementInst::Create(getOperand(0),
-                                                     getOperand(1),
-                                                     getOperand(2));
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+ExtractElementInst *ExtractElementInst::clone_impl() const {
+  return ExtractElementInst::Create(getOperand(0), getOperand(1));
 }
 
-ShuffleVectorInst *ShuffleVectorInst::clone() const {
-  ShuffleVectorInst *New = new ShuffleVectorInst(getOperand(0),
-                                                 getOperand(1),
-                                                 getOperand(2));
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+InsertElementInst *InsertElementInst::clone_impl() const {
+  return InsertElementInst::Create(getOperand(0),
+                                   getOperand(1),
+                                   getOperand(2));
 }
 
-PHINode *PHINode::clone() const {
-  PHINode *New = new PHINode(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+  return new ShuffleVectorInst(getOperand(0),
+                           getOperand(1),
+                           getOperand(2));
 }
 
-ReturnInst *ReturnInst::clone() const {
-  ReturnInst *New = new(getNumOperands()) ReturnInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+PHINode *PHINode::clone_impl() const {
+  return new PHINode(*this);
 }
 
-BranchInst *BranchInst::clone() const {
+ReturnInst *ReturnInst::clone_impl() const {
+  return new(getNumOperands()) ReturnInst(*this);
+}
+
+BranchInst *BranchInst::clone_impl() const {
   unsigned Ops(getNumOperands());
-  BranchInst *New = new(Ops, Ops == 1) BranchInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+  return new(Ops, Ops == 1) BranchInst(*this);
 }
 
-SwitchInst *SwitchInst::clone() const {
-  SwitchInst *New = new SwitchInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+SwitchInst *SwitchInst::clone_impl() const {
+  return new SwitchInst(*this);
 }
 
-InvokeInst *InvokeInst::clone() const {
-  InvokeInst *New = new(getNumOperands()) InvokeInst(*this);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata()) {
-    LLVMContext &Context = getContext();
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  }
-  return New;
+IndirectBrInst *IndirectBrInst::clone_impl() const {
+  return new IndirectBrInst(*this);
+}
+
+
+InvokeInst *InvokeInst::clone_impl() const {
+  return new(getNumOperands()) InvokeInst(*this);
 }
 
-UnwindInst *UnwindInst::clone() const {
+UnwindInst *UnwindInst::clone_impl() const {
   LLVMContext &Context = getContext();
-  UnwindInst *New = new UnwindInst(Context);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata())
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  return New;
+  return new UnwindInst(Context);
 }
 
-UnreachableInst *UnreachableInst::clone() const {
+UnreachableInst *UnreachableInst::clone_impl() const {
   LLVMContext &Context = getContext();
-  UnreachableInst *New = new UnreachableInst(Context);
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (hasMetadata())
-    Context.pImpl->TheMetadata.ValueIsCloned(this, New);
-  return New;
+  return new UnreachableInst(Context);
 }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 84902d5..1c3244b 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -22,8 +22,6 @@
 #include "llvm/Metadata.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/RWMutex.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -108,41 +106,32 @@ public:
   
   FoldingSet<MDNode> MDNodeSet;
   
-  ValueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
+  ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
 
-  typedef ValueMap<std::vector<Constant*>, ArrayType, 
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayType,
     ConstantArray, true /*largekey*/> ArrayConstantsTy;
   ArrayConstantsTy ArrayConstants;
   
-  typedef ValueMap<std::vector<Constant*>, StructType,
-                   ConstantStruct, true /*largekey*/> StructConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, StructType,
+    ConstantStruct, true /*largekey*/> StructConstantsTy;
   StructConstantsTy StructConstants;
   
-  typedef ValueMap<std::vector<Constant*>, VectorType,
-                   ConstantVector> VectorConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
+                            ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
   
-  ValueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
+  ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
   
-  ValueMap<char, Type, UndefValue> UndefValueConstants;
+  ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants;
   
-  ValueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+  DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
+  ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
   
   ConstantInt *TheTrueVal;
   ConstantInt *TheFalseVal;
   
-  // Lock used for guarding access to the leak detector
-  sys::SmartMutex<true> LLVMObjectsLock;
   LeakDetectorImpl<Value> LLVMObjects;
   
-  // Lock used for guarding access to the type maps.
-  sys::SmartMutex<true> TypeMapLock;
-  
-  // Recursive lock used for guarding access to AbstractTypeUsers.
-  // NOTE: The true template parameter means this will no-op when we're not in
-  // multithreaded mode.
-  sys::SmartMutex<true> AbstractTypeUsersLock;
-
   // Basic type instances.
   const Type VoidTy;
   const Type LabelTy;
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index 5ebd4f5..a44f61d 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -36,7 +36,6 @@ void LeakDetector::addGarbageObjectImpl(void *Object) {
 
 void LeakDetector::addGarbageObjectImpl(const Value *Object) {
   LLVMContextImpl *pImpl = Object->getContext().pImpl;
-  sys::SmartScopedLock<true> Lock(pImpl->LLVMObjectsLock);
   pImpl->LLVMObjects.addGarbage(Object);
 }
 
@@ -47,7 +46,6 @@ void LeakDetector::removeGarbageObjectImpl(void *Object) {
 
 void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
   LLVMContextImpl *pImpl = Object->getContext().pImpl;
-  sys::SmartScopedLock<true> Lock(pImpl->LLVMObjectsLock);
   pImpl->LLVMObjects.removeGarbage(Object);
 }
 
@@ -55,7 +53,6 @@ void LeakDetector::checkForGarbageImpl(LLVMContext &Context,
                                        const std::string &Message) {
   LLVMContextImpl *pImpl = Context.pImpl;
   sys::SmartScopedLock<true> Lock(*ObjectsLock);
-  sys::SmartScopedLock<true> CLock(pImpl->LLVMObjectsLock);
   
   Objects->setName("GENERIC");
   pImpl->LLVMObjects.setName("LLVM");
diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h
index b0c3a14..bd10a47 100644
--- a/lib/VMCore/LeaksContext.h
+++ b/lib/VMCore/LeaksContext.h
@@ -14,7 +14,6 @@
 
 #include "llvm/Value.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 template <class T>
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ad9653f..4fadfed 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -344,20 +344,22 @@ getMDs(const Instruction *Inst, SmallVectorImpl<MDPairTy> &MDs) const {
   MDStoreTy::iterator I = MetadataStore.find(Inst);
   if (I == MetadataStore.end())
     return;
+  MDs.resize(I->second.size());
   for (MDMapTy::iterator MI = I->second.begin(), ME = I->second.end();
        MI != ME; ++MI)
-    MDs.push_back(std::make_pair(MI->first, MI->second));
-  std::sort(MDs.begin(), MDs.end());
+    // MD kinds are numbered from 1.
+    MDs[MI->first - 1] = std::make_pair(MI->first, MI->second);
 }
 
 /// getHandlerNames - Populate client supplied smallvector using custome
 /// metadata name and ID.
 void MetadataContextImpl::
 getHandlerNames(SmallVectorImpl<std::pair<unsigned, StringRef> >&Names) const {
+  Names.resize(MDHandlerNames.size());
   for (StringMap<unsigned>::const_iterator I = MDHandlerNames.begin(),
          E = MDHandlerNames.end(); I != E; ++I) 
-    Names.push_back(std::make_pair(I->second, I->first()));
-  std::sort(Names.begin(), Names.end());
+    // MD Handlers are numbered from 1.
+    Names[I->second - 1] = std::make_pair(I->second, I->first());
 }
 
 /// ValueIsCloned - This handler is used to update metadata store
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index f10bc6f..eb097ed 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -105,8 +105,7 @@ namespace {
 /// BBPassManager manages BasicBlockPass. It batches all the
 /// pass together and sequence them to process one basic block before
 /// processing next basic block.
-class VISIBILITY_HIDDEN BBPassManager : public PMDataManager, 
-                                        public FunctionPass {
+class BBPassManager : public PMDataManager, public FunctionPass {
 
 public:
   static char ID;
@@ -367,7 +366,7 @@ namespace {
 
 static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
 
-class VISIBILITY_HIDDEN TimingInfo {
+class TimingInfo {
   std::map<Pass*, Timer> TimingData;
   TimerGroup TG;
 
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index 0a7f449..3d4f19d 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -16,13 +16,12 @@
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
 
-  class VISIBILITY_HIDDEN PrintModulePass : public ModulePass {
+  class PrintModulePass : public ModulePass {
     raw_ostream *Out;       // raw_ostream to print on
     bool DeleteStream;      // Delete the ostream in our dtor?
   public:
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 7afbc68..739c463 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -27,8 +27,6 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/RWMutex.h"
 #include "llvm/System/Threading.h"
 #include <algorithm>
 #include <cstdarg>
@@ -768,7 +766,6 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
   
   // First, see if the type is already in the table, for which
   // a reader lock suffices.
-  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
   ITy = pImpl->IntegerTypes.get(IVT);
     
   if (!ITy) {
@@ -810,7 +807,6 @@ FunctionType *FunctionType::get(const Type *ReturnType,
   
   LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
   
-  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
   FT = pImpl->FunctionTypes.get(VT);
   
   if (!FT) {
@@ -835,7 +831,6 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
 
   LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   
-  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
   AT = pImpl->ArrayTypes.get(AVT);
       
   if (!AT) {
@@ -861,7 +856,6 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
   
   LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   
-  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
   PT = pImpl->VectorTypes.get(PVT);
     
   if (!PT) {
@@ -890,7 +884,6 @@ StructType *StructType::get(LLVMContext &Context,
   
   LLVMContextImpl *pImpl = Context.pImpl;
   
-  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
   ST = pImpl->StructTypes.get(STV);
     
   if (!ST) {
@@ -938,7 +931,6 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
   
   LLVMContextImpl *pImpl = ValueType->getContext().pImpl;
   
-  sys::SmartScopedLock<true> L(pImpl->TypeMapLock);
   PT = pImpl->PointerTypes.get(PVT);
   
   if (!PT) {
@@ -970,10 +962,7 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
 // it.  This function is called primarily by the PATypeHandle class.
 void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
   assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
-  LLVMContextImpl *pImpl = getContext().pImpl;
-  pImpl->AbstractTypeUsersLock.acquire();
   AbstractTypeUsers.push_back(U);
-  pImpl->AbstractTypeUsersLock.release();
 }
 
 
@@ -983,8 +972,6 @@ void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
 // is annihilated, because there is no way to get a reference to it ever again.
 //
 void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
-  LLVMContextImpl *pImpl = getContext().pImpl;
-  pImpl->AbstractTypeUsersLock.acquire();
   
   // Search from back to front because we will notify users from back to
   // front.  Also, it is likely that there will be a stack like behavior to
@@ -1013,7 +1000,6 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
   this->destroy();
   }
   
-  pImpl->AbstractTypeUsersLock.release();
 }
 
 // unlockedRefineAbstractTypeTo - This function is used when it is discovered
@@ -1065,7 +1051,6 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   // will not cause users to drop off of the use list.  If we resolve to ourself
   // we succeed!
   //
-  pImpl->AbstractTypeUsersLock.acquire();
   while (!AbstractTypeUsers.empty() && NewTy != this) {
     AbstractTypeUser *User = AbstractTypeUsers.back();
 
@@ -1081,7 +1066,6 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
     assert(AbstractTypeUsers.size() != OldSize &&
            "AbsTyUser did not remove self from user list!");
   }
-  pImpl->AbstractTypeUsersLock.release();
 
   // If we were successful removing all users from the type, 'this' will be
   // deleted when the last PATypeHolder is destroyed or updated from this type.
@@ -1095,7 +1079,6 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
 void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // All recursive calls will go through unlockedRefineAbstractTypeTo,
   // to avoid deadlock problems.
-  sys::SmartScopedLock<true> L(NewType->getContext().pImpl->TypeMapLock);
   unlockedRefineAbstractTypeTo(NewType);
 }
 
@@ -1107,9 +1090,6 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
   DEBUG(errs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
 #endif
 
-  LLVMContextImpl *pImpl = getContext().pImpl;
-
-  pImpl->AbstractTypeUsersLock.acquire();
   unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
   while (!AbstractTypeUsers.empty()) {
     AbstractTypeUser *ATU = AbstractTypeUsers.back();
@@ -1118,7 +1098,6 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
     assert(AbstractTypeUsers.size() < OldSize-- &&
            "AbstractTypeUser did not remove itself from the use list!");
   }
-  pImpl->AbstractTypeUsersLock.release();
 }
 
 // refineAbstractType - Called when a contained type is found to be more
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index f31ea66..3440a77 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -17,16 +17,12 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/RWMutex.h"
-#include "llvm/System/Threading.h"
 #include <algorithm>
 using namespace llvm;
 
 #define DEBUG_SYMBOL_TABLE 0
 #define DEBUG_ABSTYPE 0
 
-static ManagedStatic<sys::SmartRWMutex<true> > TypeSymbolTableLock;
-
 TypeSymbolTable::~TypeSymbolTable() {
   // Drop all abstract type references in the type plane...
   for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
@@ -38,8 +34,6 @@ TypeSymbolTable::~TypeSymbolTable() {
 std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
   std::string TryName = BaseName;
   
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
-  
   const_iterator End = tmap.end();
 
   // See if the name exists
@@ -50,8 +44,6 @@ std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
 
 // lookup a type by name - returns null on failure
 Type* TypeSymbolTable::lookup(const StringRef &Name) const {
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
-  
   const_iterator TI = tmap.find(Name);
   Type* result = 0;
   if (TI != tmap.end())
@@ -59,21 +51,9 @@ Type* TypeSymbolTable::lookup(const StringRef &Name) const {
   return result;
 }
 
-TypeSymbolTable::iterator TypeSymbolTable::find(const StringRef &Name) {
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);  
-  return tmap.find(Name);
-}
-
-TypeSymbolTable::const_iterator
-TypeSymbolTable::find(const StringRef &Name) const {
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);  
-  return tmap.find(Name);
-}
 
 // remove - Remove a type from the symbol table...
 Type* TypeSymbolTable::remove(iterator Entry) {
-  TypeSymbolTableLock->writer_acquire();
-  
   assert(Entry != tmap.end() && "Invalid entry to remove!");
   const Type* Result = Entry->second;
 
@@ -84,8 +64,6 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 
   tmap.erase(Entry);
   
-  TypeSymbolTableLock->writer_release();
-
   // If we are removing an abstract type, remove the symbol table from it's use
   // list...
   if (Result->isAbstract()) {
@@ -105,8 +83,6 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 void TypeSymbolTable::insert(const StringRef &Name, const Type* T) {
   assert(T && "Can't insert null type into symbol table!");
 
-  TypeSymbolTableLock->writer_acquire();
-
   if (tmap.insert(std::make_pair(Name, T)).second) {
     // Type inserted fine with no conflict.
     
@@ -132,8 +108,6 @@ void TypeSymbolTable::insert(const StringRef &Name, const Type* T) {
     tmap.insert(make_pair(UniqueName, T));
   }
   
-  TypeSymbolTableLock->writer_release();
-
   // If we are adding an abstract type, add the symbol table to it's use list.
   if (T->isAbstract()) {
     cast<DerivedType>(T)->addAbstractTypeUser(this);
@@ -146,8 +120,6 @@ void TypeSymbolTable::insert(const StringRef &Name, const Type* T) {
 // This function is called when one of the types in the type plane are refined
 void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
                                          const Type *NewType) {
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
-  
   // Loop over all of the types in the symbol table, replacing any references
   // to OldType with references to NewType.  Note that there may be multiple
   // occurrences, and although we only need to remove one at a time, it's
@@ -177,7 +149,6 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
   // Loop over all of the types in the symbol table, dropping any abstract
   // type user entries for AbsTy which occur because there are names for the
   // type.
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
     if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
       AbsTy->removeAbstractTypeUser(this);
@@ -191,8 +162,6 @@ static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
 
 void TypeSymbolTable::dump() const {
   errs() << "TypeSymbolPlane: ";
-  sys::SmartScopedReader<true> Reader(*TypeSymbolTableLock);
   for_each(tmap.begin(), tmap.end(), DumpTypes);
 }
 
-// vim: sw=2 ai
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 35ec9be..826e8a1 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -27,8 +27,6 @@
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/System/RWMutex.h"
-#include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
 #include <algorithm>
 using namespace llvm;
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 3bfd47c..5990e48 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -62,7 +62,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -70,7 +69,7 @@
 using namespace llvm;
 
 namespace {  // Anonymous namespace for class
-  struct VISIBILITY_HIDDEN PreVerifier : public FunctionPass {
+  struct PreVerifier : public FunctionPass {
     static char ID; // Pass ID, replacement for typeid
 
     PreVerifier() : FunctionPass(&ID) { }
@@ -321,7 +320,7 @@ namespace {
     void visitUserOp1(Instruction &I);
     void visitUserOp2(Instruction &I) { visitUserOp1(I); }
     void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
-    void visitAllocationInst(AllocationInst &AI);
+    void visitAllocaInst(AllocaInst &AI);
     void visitExtractValueInst(ExtractValueInst &EVI);
     void visitInsertValueInst(InsertValueInst &IVI);
 
@@ -659,6 +658,12 @@ void Verifier::visitFunction(Function &F) {
     BasicBlock *Entry = &F.getEntryBlock();
     Assert1(pred_begin(Entry) == pred_end(Entry),
             "Entry block to function must not have predecessors!", Entry);
+    
+    // The address of the entry block cannot be taken, unless it is dead.
+    if (Entry->hasAddressTaken()) {
+      Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
+              "blockaddress may not be used with the entry block!", Entry);
+    }
   }
   
   // If this function is actually an intrinsic, verify that it is only used in
@@ -1282,7 +1287,7 @@ void Verifier::visitStoreInst(StoreInst &SI) {
   visitInstruction(SI);
 }
 
-void Verifier::visitAllocationInst(AllocationInst &AI) {
+void Verifier::visitAllocaInst(AllocaInst &AI) {
   const PointerType *PTy = AI.getType();
   Assert1(PTy->getAddressSpace() == 0, 
           "Allocation instruction pointer not in the generic address space!",
diff --git a/projects/sample/lib/sample/sample.c b/projects/sample/lib/sample/sample.c
index a5ae280..8ebb5ec 100644
--- a/projects/sample/lib/sample/sample.c
+++ b/projects/sample/lib/sample/sample.c
@@ -11,7 +11,7 @@
 #include <stdlib.h>
 
 /* LLVM Header File
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 */
 
 /* Header file global to this project */
diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
index 5d08312..49327ac 100644
--- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
+++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -aa-eval -disable-output |& grep {2 no alias respon}
 ; TEST that A[1][0] may alias A[0][i].
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define void @test(i32 %N) {
 entry:
diff --git a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
index 5ea26e7..6475471 100644
--- a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
+++ b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -gvn -instcombine -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8)
 
diff --git a/test/Analysis/BasicAA/featuretest.ll b/test/Analysis/BasicAA/featuretest.ll
index 737ee45..50dc886 100644
--- a/test/Analysis/BasicAA/featuretest.ll
+++ b/test/Analysis/BasicAA/featuretest.ll
@@ -2,6 +2,7 @@
 ; determine, as noted in the comments.
 
 ; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | not grep REMOVE
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @Global = external global { i32 }
 
diff --git a/test/Analysis/BasicAA/global-size.ll b/test/Analysis/BasicAA/global-size.ll
index 0a643d4..b9cbbcc 100644
--- a/test/Analysis/BasicAA/global-size.ll
+++ b/test/Analysis/BasicAA/global-size.ll
@@ -2,6 +2,7 @@
 ; the global.
 
 ; RUN: opt < %s -basicaa -gvn -instcombine -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @B = global i16 8               ; <i16*> [#uses=2]
 
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 69b60d7..02db861 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -basicaa -gvn -dse -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32)
 declare void @llvm.memset.i8(i8*, i8, i8, i32)
@@ -88,4 +89,4 @@ define void @test3a(i8* %P, i8 %X) {
   call void @llvm.lifetime.end(i64 10, i8* %P)
   ret void
 ; CHECK: ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll
new file mode 100644
index 0000000..c69e824
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-and-select.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output \
+; RUN:   |& grep {NoAlias:	double\\* \[%\]a, double\\* \[%\]b\$} | count 4
+
+; BasicAA should detect NoAliases in PHIs and Selects.
+
+; Two PHIs in the same block.
+define void @foo(i1 %m, double* noalias %x, double* noalias %y) {
+entry:
+  br i1 %m, label %true, label %false
+
+true:
+  br label %exit
+
+false:
+  br label %exit
+
+exit:
+  %a = phi double* [ %x, %true ], [ %y, %false ]
+  %b = phi double* [ %x, %false ], [ %y, %true ]
+  volatile store double 0.0, double* %a
+  volatile store double 1.0, double* %b
+  ret void
+}
+
+; Two selects with the same condition.
+define void @bar(i1 %m, double* noalias %x, double* noalias %y) {
+entry:
+  %a = select i1 %m, double* %x, double* %y
+  %b = select i1 %m, double* %y, double* %x
+  volatile store double 0.000000e+00, double* %a
+  volatile store double 1.000000e+00, double* %b
+  ret void
+}
+
+; Two PHIs with disjoint sets of inputs.
+define void @qux(i1 %m, double* noalias %x, double* noalias %y,
+                 i1 %n, double* noalias %v, double* noalias %w) {
+entry:
+  br i1 %m, label %true, label %false
+
+true:
+  br label %exit
+
+false:
+  br label %exit
+
+exit:
+  %a = phi double* [ %x, %true ], [ %y, %false ]
+  br i1 %n, label %ntrue, label %nfalse
+
+ntrue:
+  br label %nexit
+
+nfalse:
+  br label %nexit
+
+nexit:
+  %b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
+  volatile store double 0.0, double* %a
+  volatile store double 1.0, double* %b
+  ret void
+}
+
+; Two selects with disjoint sets of arms.
+define void @fin(i1 %m, double* noalias %x, double* noalias %y,
+                 i1 %n, double* noalias %v, double* noalias %w) {
+entry:
+  %a = select i1 %m, double* %x, double* %y
+  %b = select i1 %n, double* %v, double* %w
+  volatile store double 0.000000e+00, double* %a
+  volatile store double 1.000000e+00, double* %b
+  ret void
+}
diff --git a/test/Analysis/BasicAA/store-promote.ll b/test/Analysis/BasicAA/store-promote.ll
index d8e7c75..33d0f3a 100644
--- a/test/Analysis/BasicAA/store-promote.ll
+++ b/test/Analysis/BasicAA/store-promote.ll
@@ -3,6 +3,7 @@
 ; two pointers, then the load should be hoisted, and the store sunk.
 
 ; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @A = global i32 7               ; <i32*> [#uses=3]
 @B = global i32 8               ; <i32*> [#uses=2]
diff --git a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
index 617c23f..9355aee 100644
--- a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
+++ b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
@@ -2,7 +2,7 @@
 ; not a child of the loopentry.6 loop.
 ;
 ; RUN: opt < %s -analyze -loops | \
-; RUN:   grep {^            Loop at depth 4 containing: %loopentry.7<header><latch><exit>}
+; RUN:   grep {^            Loop at depth 4 containing: %loopentry.7<header><latch><exiting>}
 
 define void @getAndMoveToFrontDecode() {
 	br label %endif.2
diff --git a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
index 2b3c982..27fe714 100644
--- a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
+++ b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep -e {-->  %b}
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | FileCheck %s
 ; PR1810
 
 define void @fun() {
@@ -16,3 +16,6 @@ body:
 exit:        
         ret void
 }
+
+; CHECK: -->  %b
+
diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll b/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
index 97d0640..37b5b94 100644
--- a/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
@@ -1,6 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   -scalar-evolution-max-iterations=0 | \
-; RUN: grep -F "backedge-taken count is (-1 + (-1 * %j))"
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2607
 
 define i32 @_Z1aj(i32 %j) nounwind  {
@@ -25,3 +24,5 @@ return:		; preds = %return.loopexit, %entry
 	ret i32 %i.0.lcssa
 }
 
+; CHECK: backedge-taken count is (-1 + (-1 * %j))
+
diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
index 7f4de91..d54b3b4 100644
--- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
@@ -1,6 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   -scalar-evolution-max-iterations=0 | \
-; RUN: grep -F "backedge-taken count is (-2147483632 + ((-1 + (-1 * %x)) smax (-1 + (-1 * %y))))"
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2607
 
 define i32 @b(i32 %x, i32 %y) nounwind {
@@ -22,3 +21,6 @@ afterfor:		; preds = %forinc, %entry
 	%j.0.lcssa = phi i32 [ -2147483632, %entry ], [ %dec, %forinc ]
 	ret i32 %j.0.lcssa
 }
+
+; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %x)) smax (-1 + (-1 * %y))))
+
diff --git a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
index fa09895..06200ae 100644
--- a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
+++ b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   -scalar-evolution-max-iterations=0 | grep -F "Exits: 20028"
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2621
 
 define i32 @a() nounwind  {
@@ -23,3 +23,5 @@ bb2:
 	ret i32 %4
 }
 
+; CHECK: Exits: 20028
+
diff --git a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
index 5a28117..f3c703a 100644
--- a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   -scalar-evolution-max-iterations=0 | grep -F "Exits: -19168"
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2621
 
 define i32 @a() nounwind  {
@@ -54,3 +54,5 @@ bb2:		; preds = %bb1
 	ret i32 %19
 }
 
+; CHECK: Exits: -19168
+
diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index 6ed26148..e81530e 100644
--- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {count is 2}
 ; PR3171
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 	%struct.Foo = type { i32 }
 	%struct.NonPod = type { [2 x %struct.Foo] }
diff --git a/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
new file mode 100644
index 0000000..465368b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 -enable-unsafe-fp-math < %s
+; PR5367
+
+define arm_aapcs_vfpcc void @_Z27Benchmark_SceDualQuaternionPvm(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float* null, align 4                  ; <float> [#uses=1]
+  %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
+  %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+  %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=4]
+  %6 = fsub <4 x float> zeroinitializer, %3       ; <<4 x float>> [#uses=1]
+  %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=2]
+  %8 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %9 = shufflevector <2 x float> %8, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=2]
+  %10 = fmul <4 x float> %7, %9                   ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=2]
+  %13 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %11, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %10, %14                 ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %15, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %18 = shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %19 = fmul <4 x float> %7, %16                  ; <<4 x float>> [#uses=1]
+  %20 = fadd <4 x float> %19, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %23 = fmul <4 x float> %22, %9                  ; <<4 x float>> [#uses=1]
+  %24 = fadd <4 x float> %20, %23                 ; <<4 x float>> [#uses=1]
+  %25 = shufflevector <4 x float> %18, <4 x float> %24, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> ; <<4 x float>> [#uses=1]
+  %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %27 = fmul <4 x float> %26, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %5 ; <<4 x float>> [#uses=1]
+  %29 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
+  %30 = fmul <4 x float> zeroinitializer, %29     ; <<4 x float>> [#uses=1]
+  %31 = fmul <4 x float> %30, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %32 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %33 = shufflevector <4 x float> %28, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %34 = shufflevector <2 x float> %33, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %35 = fmul <4 x float> %32, %34                 ; <<4 x float>> [#uses=1]
+  %36 = fadd <4 x float> %35, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %37 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %38 = shufflevector <4 x float> %37, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %39 = fmul <4 x float> zeroinitializer, %38     ; <<4 x float>> [#uses=1]
+  %40 = fadd <4 x float> %36, %39                 ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %40, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %42 = shufflevector <4 x float> undef, <4 x float> %41, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %42, %31                 ; <<4 x float>> [#uses=1]
+  store float undef, float* undef, align 4
+  store float 0.000000e+00, float* null, align 4
+  %44 = extractelement <4 x float> %43, i32 1     ; <float> [#uses=1]
+  store float %44, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
new file mode 100644
index 0000000..a4e7685
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi  | FileCheck %s
+
+@.str = private constant [1 x i8] zeroinitializer, align 1
+
+define arm_aapcscc void @g() {
+entry:
+;CHECK: [sp, #+8]
+;CHECK: [sp, #+12]
+;CHECK: [sp]
+        tail call arm_aapcscc  void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
+        ret void
+}
+
+declare arm_aapcscc void @f(i8*, ...)
diff --git a/test/CodeGen/ARM/2009-10-30.ll b/test/CodeGen/ARM/2009-10-30.ll
new file mode 100644
index 0000000..8256386
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-30.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi  | FileCheck %s
+; This test checks that the address of the varg arguments is correctly
+; computed when there are 5 or more regular arguments.
+
+define void @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) {
+entry:
+;CHECK: sub	sp, sp, #4
+;CHECK: add	r0, sp, #8
+;CHECK: str	r0, [sp], #+4
+;CHECK: bx	lr
+	%ap = alloca i8*, align 4
+	%ap1 = bitcast i8** %ap to i8*
+	call void @llvm.va_start(i8* %ap1)
+	ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
new file mode 100644
index 0000000..c260b97
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%foo = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @bar(%foo* noalias sret %agg.result, <4 x float> %quat.0) nounwind {
+entry:
+  %quat_addr = alloca %foo, align 16              ; <%foo*> [#uses=2]
+  %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+  store <4 x float> %quat.0, <4 x float>* %0
+  %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+  %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
+  %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
+  %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+  %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
+  %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
+  %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %8                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> %11, %1                  ; <<4 x float>> [#uses=1]
+  %13 = call arm_aapcs_vfpcc  %foo* @baz(%foo* %agg.result, <4 x float> %12) nounwind ; <%foo*> [#uses=0]
+  ret void
+}
+
+declare arm_aapcs_vfpcc %foo* @baz(%foo*, <4 x float>) nounwind
+
+declare arm_aapcs_vfpcc <4 x float> @quux(%foo* nocapture) nounwind readonly
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
new file mode 100644
index 0000000..f2288c3
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.32
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  %4 = extractelement <4 x float> %3, i32 1       ; <float> [#uses=1]
+  store float %4, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll
index 15cf677..82a8c98 100644
--- a/test/CodeGen/ARM/alloca.ll
+++ b/test/CodeGen/ARM/alloca.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
-; RUN:   grep {mov r11, sp}
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
-; RUN:   grep {mov sp, r11}
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s
 
 define void @f(i32 %a) {
 entry:
+; CHECK: mov r11, sp
         %tmp = alloca i8, i32 %a                ; <i8*> [#uses=1]
         call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 )
         ret void
+; CHECK: mov sp, r11
 }
 
 declare void @g(i8*, i32, i32, i32, i32)
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index ad5b2d6..cc71839 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {mov r0, r2} | count 1
-; RUN: llc < %s -mtriple=arm-apple-darwin | \
-; RUN:   grep {mov r0, r1} | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
 
 define i32 @f(i32 %a, i64 %b) {
+; ELF: mov r0, r2
+; DARWIN: mov r0, r1
         %tmp = call i32 @g(i64 %b)
         ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index 690f488..d8019a0 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
 
 define float @f(float %z, double %a, float %b) {
+; CHECK: fcpys s0, s1
         %tmp = call float @g(float %b)
         ret float %tmp
 }
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index c4b4ec6..72ec8ef 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
+; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
         icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
         br i1 %0, label %return, label %bb
 
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
index 53392de..c4a44b4 100644
--- a/test/CodeGen/ARM/bfc.ll
+++ b/test/CodeGen/ARM/bfc.ll
@@ -1,19 +1,25 @@
-; RUN: llc < %s -march=arm -mattr=+v6t2 | grep "bfc " | count 3
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc
     %tmp = and i32 %a, 4278190095
     ret i32 %tmp
 }
 
 ; 4286578688 = 0xff800000
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc
     %tmp = and i32 %a, 4286578688
     ret i32 %tmp
 }
 
 ; 4095 = 0x00000fff
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc
     %tmp = and i32 %a, 4095
     ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 52246c3..3dd66ae 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -1,13 +1,16 @@
-; RUN: llc < %s -march=arm | grep {mov lr, pc}
-; RUN: llc < %s -march=arm -mattr=+v5t | grep blx
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
-; RUN:   -relocation-model=pic | grep {PLT}
+; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
 declare void @g(i32, i32, i32, i32)
 
 define void @f() {
+; CHECKV4: mov lr, pc
+; CHECKV5: blx
+; CHECKELF: PLT
         call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index 294de5f..a6a7ed6 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm | grep "subs r" | count 2
-; RUN: llc < %s -march=arm | grep "adc r"
-; RUN: llc < %s -march=arm | grep "sbc r"  | count 2
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r
+; CHECK: sbc r
 entry:
 	%tmp = sub i64 %a, %b
 	ret i64 %tmp
 }
 
 define i64 @f2(i64 %a, i64 %b) {
+; CHECK: f2:
+; CHECK: adc r
+; CHECK: subs r
+; CHECK: sbc r
 entry:
         %tmp1 = shl i64 %a, 1
 	%tmp2 = sub i64 %tmp1, %b
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index e2d8ddc..ce91936 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,39 +1,44 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {mov r0, #0} | count 1
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {mov r0, #255$} | count 1
-; RUN: llc < %s -march=arm -asm-verbose | \
-; RUN:   grep {mov r0.*256} | count 1
-; RUN: llc < %s -march=arm -asm-verbose | grep {orr.*256} | count 1
-; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
-; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
-; RUN: llc < %s -march=arm | grep {cmp r0, #1, 16} | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @f1() {
+; CHECK: f1
+; CHECK: mov r0, #0
         ret i32 0
 }
 
 define i32 @f2() {
+; CHECK: f2
+; CHECK: mov r0, #255
         ret i32 255
 }
 
 define i32 @f3() {
+; CHECK: f3
+; CHECK: mov r0{{.*}}256
         ret i32 256
 }
 
 define i32 @f4() {
+; CHECK: f4
+; CHECK: orr{{.*}}256
         ret i32 257
 }
 
 define i32 @f5() {
+; CHECK: f5
+; CHECK: mov r0, {{.*}}-1073741761
         ret i32 -1073741761
 }
 
 define i32 @f6() {
+; CHECK: f6
+; CHECK: mov r0, {{.*}}1008
         ret i32 1008
 }
 
 define void @f7(i32 %a) {
+; CHECK: f7
+; CHECK: cmp r0, #1, 16
         %b = icmp ugt i32 %a, 65536             ; <i1> [#uses=1]
         br i1 %b, label %r, label %r
 
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index 1a1cd07..5c31ea6 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
 
 define float @test(float %acc, float %a, float %b) {
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index e57bbbb..8fc13e7 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -1,11 +1,18 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEONFP
 
 define float @test(float %acc, float %a, float %b) {
 entry:
+; VFP2: fnmacs
+; NEON: fnmacs
+
+; NEONFP-NOT: vmls
+; NEONFP-NOT: fcpys
+; NEONFP:     vmul.f32
+; NEONFP:     vsub.f32
+; NEONFP:     fmrs
+
 	%0 = fmul float %a, %b
         %1 = fsub float %acc, %0
 	ret float %1
diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll
new file mode 100644
index 0000000..4de18bc
--- /dev/null
+++ b/test/CodeGen/ARM/fpconsts.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s
+
+define arm_apcscc float @t1(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t1:
+; CHECK: fconsts s1, #16
+  %0 = fadd float %x, 4.000000e+00
+  ret float %0
+}
+
+define arm_apcscc double @t2(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t2:
+; CHECK: fconstd d1, #8
+  %0 = fadd double %x, 3.000000e+00
+  ret double %0
+}
+
+define arm_apcscc double @t3(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t3:
+; CHECK: fconstd d1, #170
+  %0 = fmul double %x, -1.300000e+01
+  ret double %0
+}
+
+define arm_apcscc float @t4(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t4:
+; CHECK: fconsts s1, #184
+  %0 = fmul float %x, -2.400000e+01
+  ret float %0
+}
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index fa897bf..0822fbf 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -1,21 +1,22 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {mov r0, #0} | count 1
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN:   grep {flds.*\\\[} | count 1
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN:   grep {fsts.*\\\[} | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a) {
+; CHECK: f1:
+; CHECK: mov r0, #0
         ret float 0.000000e+00
 }
 
 define float @f2(float* %v, float %u) {
+; CHECK: f2:
+; CHECK: flds{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
 }
 
 define void @f3(float %a, float %b, float* %v) {
+; CHECK: f3:
+; CHECK: fsts{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 5116ac8..245ed51 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=arm | grep {mov r0, r0, lsr #31}
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @test1(i32 %X) {
+; CHECK: mov r0, r0, lsr #31
 entry:
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 774b3c0..1a016a0 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,13 +1,10 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep ldmia | count 2
-; RUN: llc < %s -march=arm | \
-; RUN:   grep ldmib | count 1
-; RUN: llc < %s -mtriple=arm-apple-darwin | \
-; RUN:   grep {ldmfd sp\!} | count 3
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
 define i32 @t1() {
+; CHECK: t1:
+; CHECK: ldmia
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
@@ -15,6 +12,8 @@ define i32 @t1() {
 }
 
 define i32 @t2() {
+; CHECK: t2:
+; CHECK: ldmia
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
@@ -23,6 +22,9 @@ define i32 @t2() {
 }
 
 define i32 @t3() {
+; CHECK: t3:
+; CHECK: ldmib
+; CHECK: ldmfd sp!
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index 954fb5b..011e61c 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -march=arm | grep {ldr r0} | count 7
-; RUN: llc < %s -march=arm | grep mov | grep 1
-; RUN: llc < %s -march=arm | not grep mvn
-; RUN: llc < %s -march=arm | grep ldr | grep lsl
-; RUN: llc < %s -march=arm | grep ldr | grep lsr
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @f1(i32* %v) {
+; CHECK: f1:
+; CHECK: ldr r0
 entry:
         %tmp = load i32* %v
         ret i32 %tmp
 }
 
 define i32 @f2(i32* %v) {
+; CHECK: f2:
+; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
@@ -18,6 +18,9 @@ entry:
 }
 
 define i32 @f3(i32* %v) {
+; CHECK: f3:
+; CHECK: mov
+; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32* %v, i32 1024
         %tmp = load i32* %tmp2
@@ -25,6 +28,9 @@ entry:
 }
 
 define i32 @f4(i32 %base) {
+; CHECK: f4:
+; CHECK-NOT: mvn
+; CHECK: ldr r0
 entry:
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
@@ -33,6 +39,8 @@ entry:
 }
 
 define i32 @f5(i32 %base, i32 %offset) {
+; CHECK: f5:
+; CHECK: ldr r0
 entry:
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
@@ -41,6 +49,8 @@ entry:
 }
 
 define i32 @f6(i32 %base, i32 %offset) {
+; CHECK: f6:
+; CHECK: ldr r0{{.*}}lsl{{.*}}
 entry:
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -50,6 +60,8 @@ entry:
 }
 
 define i32 @f7(i32 %base, i32 %offset) {
+; CHECK: f7:
+; CHECK: ldr r0{{.*}}lsr{{.*}}
 entry:
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index 2fcaac0..16ef7cc 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -1,47 +1,50 @@
-; RUN: llc < %s -march=arm -asm-verbose | \
-; RUN:   grep -- {-2147483648} | count 3
-; RUN: llc < %s -march=arm | grep mvn | count 3
-; RUN: llc < %s -march=arm | grep adds | count 1
-; RUN: llc < %s -march=arm | grep adc | count 1
-; RUN: llc < %s -march=arm | grep {subs } | count 1
-; RUN: llc < %s -march=arm | grep sbc | count 1
-; RUN: llc < %s -march=arm | \
-; RUN:   grep smull | count 1
-; RUN: llc < %s -march=arm | \
-; RUN:   grep umull | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f1() {
+; CHECK: f1:
 entry:
         ret i64 0
 }
 
 define i64 @f2() {
+; CHECK: f2:
 entry:
         ret i64 1
 }
 
 define i64 @f3() {
+; CHECK: f3:
+; CHECK: mvn{{.*}}-2147483648
 entry:
         ret i64 2147483647
 }
 
 define i64 @f4() {
+; CHECK: f4:
+; CHECK: -2147483648
 entry:
         ret i64 2147483648
 }
 
 define i64 @f5() {
+; CHECK: f5:
+; CHECK: mvn
+; CHECK: mvn{{.*}}-2147483648
 entry:
         ret i64 9223372036854775807
 }
 
 define i64 @f6(i64 %x, i64 %y) {
+; CHECK: f6:
+; CHECK: adds
+; CHECK: adc
 entry:
         %tmp1 = add i64 %y, 1           ; <i64> [#uses=1]
         ret i64 %tmp1
 }
 
 define void @f7() {
+; CHECK: f7:
 entry:
         %tmp = call i64 @f8( )          ; <i64> [#uses=0]
         ret void
@@ -50,12 +53,17 @@ entry:
 declare i64 @f8()
 
 define i64 @f9(i64 %a, i64 %b) {
+; CHECK: f9:
+; CHECK: subs r
+; CHECK: sbc
 entry:
         %tmp = sub i64 %a, %b           ; <i64> [#uses=1]
         ret i64 %tmp
 }
 
 define i64 @f(i32 %a, i32 %b) {
+; CHECK: f:
+; CHECK: smull
 entry:
         %tmp = sext i32 %a to i64               ; <i64> [#uses=1]
         %tmp1 = sext i32 %b to i64              ; <i64> [#uses=1]
@@ -64,6 +72,8 @@ entry:
 }
 
 define i64 @g(i32 %a, i32 %b) {
+; CHECK: g:
+; CHECK: umull
 entry:
         %tmp = zext i32 %a to i64               ; <i64> [#uses=1]
         %tmp1 = zext i32 %b to i64              ; <i64> [#uses=1]
@@ -72,9 +82,9 @@ entry:
 }
 
 define i64 @f10() {
+; CHECK: f10:
 entry:
         %a = alloca i64, align 8                ; <i64*> [#uses=1]
         %retval = load i64* %a          ; <i64> [#uses=1]
         ret i64 %retval
 }
-
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 057b5f0..688b7bc 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep rrx %t | count 1
-; RUN: grep __ashldi3 %t
-; RUN: grep __ashrdi3 %t
-; RUN: grep __lshrdi3 %t
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f0(i64 %A, i64 %B) {
+; CHECK: f0
+; CHECK:      movs    r3, r3, lsr #1
+; CHECK-NEXT: mov     r2, r2, rrx
+; CHECK-NEXT: subs    r0, r0, r2
+; CHECK-NEXT: sbc     r1, r1, r3
 	%tmp = bitcast i64 %A to i64
 	%tmp2 = lshr i64 %B, 1
 	%tmp3 = sub i64 %tmp, %tmp2
@@ -12,18 +13,34 @@ define i64 @f0(i64 %A, i64 %B) {
 }
 
 define i32 @f1(i64 %x, i64 %y) {
+; CHECK: f1
+; CHECK: mov r0, r0, lsl r2
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
 }
 
 define i32 @f2(i64 %x, i64 %y) {
+; CHECK: f2
+; CHECK:      mov     r0, r0, lsr r2
+; CHECK-NEXT: rsb     r3, r2, #32
+; CHECK-NEXT: sub     r2, r2, #32
+; CHECK-NEXT: cmp     r2, #0
+; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: movge   r0, r1, asr r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
 }
 
 define i32 @f3(i64 %x, i64 %y) {
+; CHECK: f3
+; CHECK:      mov     r0, r0, lsr r2
+; CHECK-NEXT: rsb     r3, r2, #32
+; CHECK-NEXT: sub     r2, r2, #32
+; CHECK-NEXT: cmp     r2, #0
+; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: movge   r0, r1, lsr r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index ba9699e..50da997 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin 
-; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 4
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.LOCBOX = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index 801b9ce..97916f1 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {str .*\\\[.*\],} | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i16 @test1(i32* %X, i16* %A) {
+; CHECK: test1:
+; CHECK: strh {{.*}}[{{.*}}], #-4
         %Y = load i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
         store i16 %tmp1, i16* %A
@@ -13,6 +12,8 @@ define i16 @test1(i32* %X, i16* %A) {
 }
 
 define i32 @test2(i32* %X, i32* %A) {
+; CHECK: test2:
+; CHECK: str {{.*}}[{{.*}}],
         %Y = load i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
         %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index 32847208..d932f90 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -1,19 +1,27 @@
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep {i(gottpoff)}
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep {ldr r., \[pc, r.\]}
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+; RUN:   | FileCheck %s -check-prefix=CHECK-NONPIC
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
 define i32 @f() {
+; CHECK-NONPIC: f:
+; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: i(gottpoff)
+; CHECK-PIC: f:
+; CHECK-PIC: __tls_get_addr
 entry:
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32* @g() {
+; CHECK-NONPIC: g:
+; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: i(gottpoff)
+; CHECK-PIC: g:
+; CHECK-PIC: __tls_get_addr
 entry:
 	ret i32* @i
 }
diff --git a/test/CodeGen/CPP/llvm2cpp.ll b/test/CodeGen/CPP/llvm2cpp.ll
index 447f332..d0ba0cf 100644
--- a/test/CodeGen/CPP/llvm2cpp.ll
+++ b/test/CodeGen/CPP/llvm2cpp.ll
@@ -273,7 +273,7 @@ define i32 @foozball(i32) {
 @A = global i32* @B		; <i32**> [#uses=0]
 @B = global i32 7		; <i32*> [#uses=1]
 
-define void @X() {
+define void @test12312() {
 	ret void
 }
 ; ModuleID = 'global_section.ll'
diff --git a/test/CodeGen/Generic/intrinsics.ll b/test/CodeGen/Generic/intrinsics.ll
index 9a42c3e..29bc499 100644
--- a/test/CodeGen/Generic/intrinsics.ll
+++ b/test/CodeGen/Generic/intrinsics.ll
@@ -14,9 +14,9 @@ define double @test_sqrt(float %F) {
 
 
 ; SIN
-declare float @sinf(float)
+declare float @sinf(float) readonly
 
-declare double @sin(double)
+declare double @sin(double) readonly
 
 define double @test_sin(float %F) {
         %G = call float @sinf( float %F )               ; <float> [#uses=1]
@@ -27,9 +27,9 @@ define double @test_sin(float %F) {
 
 
 ; COS
-declare float @cosf(float)
+declare float @cosf(float) readonly
 
-declare double @cos(double)
+declare double @cos(double) readonly
 
 define double @test_cos(float %F) {
         %G = call float @cosf( float %F )               ; <float> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
index 3cbb212..7647474 100644
--- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp 
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | grep fcpys | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | not grep fcpys
 ; rdar://7117307
 
 	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
diff --git a/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll b/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll
new file mode 100644
index 0000000..216f3e3
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+
+define arm_apcscc void @get_initial_mb16x16_cost() nounwind {
+entry:
+  br i1 undef, label %bb4, label %bb1
+
+bb1:                                              ; preds = %entry
+  br label %bb7
+
+bb4:                                              ; preds = %entry
+  br i1 undef, label %bb7.thread, label %bb5
+
+bb5:                                              ; preds = %bb4
+  br label %bb7
+
+bb7.thread:                                       ; preds = %bb4
+  br label %bb8
+
+bb7:                                              ; preds = %bb5, %bb1
+  br i1 undef, label %bb8, label %bb10
+
+bb8:                                              ; preds = %bb7, %bb7.thread
+  %0 = phi double [ 5.120000e+02, %bb7.thread ], [ undef, %bb7 ] ; <double> [#uses=1]
+  %1 = fdiv double %0, undef                      ; <double> [#uses=0]
+  unreachable
+
+bb10:                                             ; preds = %bb7
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll
new file mode 100644
index 0000000..572f1e8
--- /dev/null
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+
+declare arm_apcscc i32 @fgetc(%struct.FILE* nocapture) nounwind
+
+define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb1
+  unreachable
+
+bb1.i2:                                           ; preds = %bb1
+  %0 = call arm_apcscc  i32 @fgetc(%struct.FILE* undef) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb2.i3, label %bb3.i4
+
+bb2.i3:                                           ; preds = %bb1.i2
+  br i1 undef, label %bb4.i, label %bb3.i4
+
+bb3.i4:                                           ; preds = %bb2.i3, %bb1.i2
+  unreachable
+
+bb4.i:                                            ; preds = %bb2.i3
+  br i1 undef, label %bb5.i, label %get_image.exit
+
+bb5.i:                                            ; preds = %bb4.i
+  unreachable
+
+get_image.exit:                                   ; preds = %bb4.i
+  br i1 undef, label %bb28, label %bb27
+
+bb27:                                             ; preds = %get_image.exit
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb27
+  %1 = fptrunc double undef to float              ; <float> [#uses=1]
+  %2 = fptoui float %1 to i8                      ; <i8> [#uses=1]
+  store i8 %2, i8* undef, align 1
+  br label %bb.i
+
+bb28:                                             ; preds = %get_image.exit
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
new file mode 100644
index 0000000..4320328
--- /dev/null
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
+
+define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
+entry:
+  br label %bb5
+
+bb5:                                              ; preds = %bb5, %entry
+  br i1 undef, label %bb5, label %bb.nph
+
+bb.nph:                                           ; preds = %bb5
+  br label %bb7
+
+bb7:                                              ; preds = %bb9, %bb.nph
+  %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
+  %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
+  %tmp53 = sub i32 undef, undef                   ; <i32> [#uses=1]
+  %0 = fadd float 0.000000e+00, 1.000000e+00      ; <float> [#uses=2]
+  %1 = fmul float 0.000000e+00, 0.000000e+00      ; <float> [#uses=2]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb7
+  %tmp54 = add i32 0, %tmp53                      ; <i32> [#uses=0]
+  %fi.1 = getelementptr float* %fz, i32 undef     ; <float*> [#uses=2]
+  %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
+  %scevgep81 = getelementptr float* %fz, i32 %tmp80 ; <float*> [#uses=1]
+  %2 = load float* undef, align 4                 ; <float> [#uses=1]
+  %3 = fmul float %2, %1                          ; <float> [#uses=1]
+  %4 = load float* null, align 4                  ; <float> [#uses=2]
+  %5 = fmul float %4, %0                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, %1                          ; <float> [#uses=1]
+  %8 = fadd float undef, %7                       ; <float> [#uses=2]
+  %9 = load float* %fi.1, align 4                 ; <float> [#uses=2]
+  %10 = fsub float %9, %8                         ; <float> [#uses=1]
+  %11 = fadd float %9, %8                         ; <float> [#uses=1]
+  %12 = fsub float 0.000000e+00, %6               ; <float> [#uses=1]
+  %13 = fsub float 0.000000e+00, undef            ; <float> [#uses=2]
+  %14 = fmul float undef, %0                      ; <float> [#uses=1]
+  %15 = fadd float %14, undef                     ; <float> [#uses=2]
+  %16 = load float* %scevgep81, align 4           ; <float> [#uses=2]
+  %17 = fsub float %16, %15                       ; <float> [#uses=1]
+  %18 = fadd float %16, %15                       ; <float> [#uses=2]
+  %19 = load float* undef, align 4                ; <float> [#uses=2]
+  %20 = fsub float %19, %13                       ; <float> [#uses=2]
+  %21 = fadd float %19, %13                       ; <float> [#uses=1]
+  %22 = fmul float %s1.02, %18                    ; <float> [#uses=1]
+  %23 = fmul float 0.000000e+00, %20              ; <float> [#uses=1]
+  %24 = fsub float %22, %23                       ; <float> [#uses=1]
+  %25 = fmul float 0.000000e+00, %18              ; <float> [#uses=1]
+  %26 = fmul float %s1.02, %20                    ; <float> [#uses=1]
+  %27 = fadd float %25, %26                       ; <float> [#uses=1]
+  %28 = fadd float %11, %27                       ; <float> [#uses=1]
+  store float %28, float* %fi.1, align 4
+  %29 = fadd float %12, %24                       ; <float> [#uses=1]
+  store float %29, float* null, align 4
+  %30 = fmul float 0.000000e+00, %21              ; <float> [#uses=1]
+  %31 = fmul float %s1.02, %17                    ; <float> [#uses=1]
+  %32 = fsub float %30, %31                       ; <float> [#uses=1]
+  %33 = fsub float %10, %32                       ; <float> [#uses=1]
+  store float %33, float* undef, align 4
+  %34 = icmp slt i32 undef, undef                 ; <i1> [#uses=1]
+  br i1 %34, label %bb8, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %35 = fadd float 0.000000e+00, undef            ; <float> [#uses=1]
+  br label %bb7
+}
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
new file mode 100644
index 0000000..4c8ffe8
--- /dev/null
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; rdar://7352504
+; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
+
+%0 = type { i16, i8, i8 }
+%1 = type { [2 x i32], [2 x i32] }
+%2 = type { %union.rec* }
+%struct.FILE_POS = type { i8, i8, i16, i32 }
+%struct.GAP = type { i8, i8, i16 }
+%struct.LIST = type { %union.rec*, %union.rec* }
+%struct.STYLE = type { %union.anon, %union.anon, i16, i16, i32 }
+%struct.head_type = type { [2 x %struct.LIST], %union.FIRST_UNION, %union.SECOND_UNION, %union.THIRD_UNION, %union.FOURTH_UNION, %union.rec*, %2, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, i32 }
+%union.FIRST_UNION = type { %struct.FILE_POS }
+%union.FOURTH_UNION = type { %struct.STYLE }
+%union.SECOND_UNION = type { %0 }
+%union.THIRD_UNION = type { %1 }
+%union.anon = type { %struct.GAP }
+%union.rec = type { %struct.head_type }
+
+@zz_hold = external global %union.rec*            ; <%union.rec**> [#uses=2]
+@zz_res = external global %union.rec*             ; <%union.rec**> [#uses=1]
+
+define arm_apcscc %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
+entry:
+; CHECK:       ldr.w	r9, [r7, #+32]
+; CHECK-NEXT : str.w	r9, [sp, #+28]
+  %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
+  %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
+  br i1 false, label %bb, label %bb20
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb20:                                             ; preds = %entry
+  switch i32 undef, label %bb1287 [
+    i32 11, label %bb119
+    i32 12, label %bb119
+    i32 21, label %bb420
+    i32 23, label %bb420
+    i32 45, label %bb438
+    i32 46, label %bb438
+    i32 55, label %bb533
+    i32 56, label %bb569
+    i32 64, label %bb745
+    i32 78, label %bb1098
+  ]
+
+bb119:                                            ; preds = %bb20, %bb20
+  unreachable
+
+bb420:                                            ; preds = %bb20, %bb20
+  store %union.rec* null, %union.rec** @zz_hold, align 4
+  store %union.rec* null, %union.rec** @zz_res, align 4
+  store %union.rec* %x, %union.rec** @zz_hold, align 4
+  %0 = call arm_apcscc  %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
+  unreachable
+
+bb438:                                            ; preds = %bb20, %bb20
+  unreachable
+
+bb533:                                            ; preds = %bb20
+  ret %union.rec* %x
+
+bb569:                                            ; preds = %bb20
+  unreachable
+
+bb745:                                            ; preds = %bb20
+  unreachable
+
+bb1098:                                           ; preds = %bb20
+  unreachable
+
+bb1287:                                           ; preds = %bb20
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
new file mode 100644
index 0000000..64309c4
--- /dev/null
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
+; rdar://7353541
+
+; The generated code is no where near ideal. It's not recognizing the two
+; constantpool entries being loaded can be merged into one.
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define arm_apcscc void @t(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; CHECK: t:
+; CHECK: cbz
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; CHECK: BB#1
+; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_0
+; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_1
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
index e1f9cdb..aae9f5c 100644
--- a/test/CodeGen/Thumb2/thumb2-bcc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -2,8 +2,8 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep it
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) {
-; CHECK: t1
-; CHECK: beq
+; CHECK: t1:
+; CHECK: cbz
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
index d33cf7e..b486045 100644
--- a/test/CodeGen/Thumb2/thumb2-bfc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -1,25 +1,32 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc r
     %tmp = and i32 %a, 4278190095
     ret i32 %tmp
 }
 
 ; 4286578688 = 0xff800000
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc r
     %tmp = and i32 %a, 4286578688
     ret i32 %tmp
 }
 
 ; 4095 = 0x00000fff
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc r
     %tmp = and i32 %a, 4095
     ret i32 %tmp
 }
 
 ; 2147483646 = 0x7ffffffe   not implementable w/ BFC
 define i32 @f4(i32 %a) {
+; CHECK: f4:
     %tmp = and i32 %a, 2147483646
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
index b46cb5f..1298384 100644
--- a/test/CodeGen/Thumb2/thumb2-branch.ll
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
 
 define void @f1(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f1:
-; CHECK bne LBB
+; CHECK: bne LBB
         %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
@@ -18,7 +18,7 @@ return:         ; preds = %entry
 define void @f2(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f2:
-; CHECK bge LBB
+; CHECK: bge LBB
         %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
@@ -33,7 +33,7 @@ return:         ; preds = %entry
 define void @f3(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f3:
-; CHECK bhs LBB
+; CHECK: bhs LBB
         %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
@@ -48,7 +48,7 @@ return:         ; preds = %entry
 define void @f4(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f4:
-; CHECK blo LBB
+; CHECK: blo LBB
         %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
         br i1 %tmp, label %return, label %cond_true
 
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
new file mode 100644
index 0000000..64587c1
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://7354379
+
+declare arm_apcscc double @floor(double) nounwind readnone
+
+define void @t(i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+; CHECK:      @ BB#2:
+; CHECK-NEXT: cbnz
+  %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr i32* undef, i32 0
+  store i32 0, i32* %1
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index 0bed058..74728bf 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: clz r
     %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
index c1fcac0..c0e19f6 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -1,25 +1,33 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "cmn\\.w "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; -0x000000bb = 4294967109
 define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmn.w {{r.*}}, #187
     %tmp = icmp ne i32 %a, 4294967109
     ret i1 %tmp
 }
 
 ; -0x00aa00aa = 4283826006
 define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmn.w {{r.*}}, #11141290
     %tmp = icmp eq i32 %a, 4283826006
     ret i1 %tmp
 }
 
 ; -0xcc00cc00 = 872363008
 define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmn.w {{r.*}}, #-872363008
     %tmp = icmp ne i32 %a, 872363008
     ret i1 %tmp
 }
 
 ; -0x00110000 = 4293853184
 define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmn.w {{r.*}}, #1114112
     %tmp = icmp eq i32 %a, 4293853184
     ret i1 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
index 185634c..6b2e9dc 100644
--- a/test/CodeGen/Thumb2/thumb2-eor2.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -1,31 +1,41 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: eor {{.*}}#187
     %tmp = xor i32 %a, 187
     ret i32 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: eor {{.*}}#11141290
     %tmp = xor i32 %a, 11141290 
     ret i32 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: eor {{.*}}#-872363008
     %tmp = xor i32 %a, 3422604288
     ret i32 %tmp
 }
 
 ; 0xdddddddd = 3722304989
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: eor {{.*}}#-572662307
     %tmp = xor i32 %a, 3722304989
     ret i32 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: eor {{.*}}#1114112
     %tmp = xor i32 %a, 1114112
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 8606e32..1dc3614 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -5,38 +5,40 @@
 ; var 2.1 - 0x00ab00ab
 define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var2_1_ok_1:
-;CHECK: #11206827
+;CHECK: add.w   r0, r0, #11206827
     %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_ok_2(i32 %lhs) {
 ;CHECK: t2_const_var2_1_ok_2:
-;CHECK: #11206656
-;CHECK: #187
+;CHECK: add.w   r0, r0, #11206656
+;CHECK: adds    r0, #187
     %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_ok_3(i32 %lhs) {
 ;CHECK: t2_const_var2_1_ok_3:
-;CHECK: #11206827
-;CHECK: #16777216
+;CHECK: add.w   r0, r0, #11206827
+;CHECK: add.w   r0, r0, #16777216
     %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_ok_4(i32 %lhs) {
 ;CHECK: t2_const_var2_1_ok_4:
-;CHECK: #16777472
-;CHECK: #11206827
+;CHECK: add.w   r0, r0, #16777472
+;CHECK: add.w   r0, r0, #11206827
     %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
 ;CHECK: t2_const_var2_1_fail_1:
-;CHECK: movt
+;CHECK: movw    r1, #43777
+;CHECK: movt    r1, #427
+;CHECK: add     r0, r1
     %ret = add i32 %lhs, 28027649 ; 0x01abab01
     ret i32 %ret
 }
@@ -44,37 +46,40 @@ define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
 ; var 2.2 - 0xab00ab00
 define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_1:
-;CHECK: #-1426019584
+;CHECK: add.w   r0, r0, #-1426019584
     %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_2:
-;CHECK: #-1426063360
-;CHECK: #47616
+;CHECK: add.w   r0, r0, #-1426063360
+;CHECK: add.w   r0, r0, #47616
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_3:
-;CHECK: #-1426019584
+;CHECK: add.w   r0, r0, #-1426019584
+;CHECK: adds    r0, #16
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_4:
-;CHECK: #-1426019584
-;CHECK: #1048592
+;CHECK: add.w   r0, r0, #-1426019584
+;CHECK: add.w   r0, r0, #1048592
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
 ;CHECK: t2_const_var2_2_fail_1:
-;CHECK: movt
+;CHECK: movw    r1, #43792
+;CHECK: movt    r1, #4267
+;CHECK: add     r0, r1
     %ret = add i32 %lhs, 279685904 ; 0x10abab10
     ret i32 %ret
 }
@@ -82,35 +87,43 @@ define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
 ; var 2.3 - 0xabababab
 define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var2_3_ok_1:
-;CHECK: #-1414812757
+;CHECK: add.w   r0, r0, #-1414812757
     %ret = add i32 %lhs, 2880154539 ; 0xabababab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
 ;CHECK: t2_const_var2_3_fail_1:
-;CHECK: movt
+;CHECK: movw    r1, #43962
+;CHECK: movt    r1, #43947
+;CHECK: add     r0, r1
     %ret = add i32 %lhs, 2880154554 ; 0xabababba
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
 ;CHECK: t2_const_var2_3_fail_2:
-;CHECK: movt
+;CHECK: movw    r1, #47787
+;CHECK: movt    r1, #43947
+;CHECK: add     r0, r1
     %ret = add i32 %lhs, 2880158379 ; 0xababbaab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
 ;CHECK: t2_const_var2_3_fail_3:
-;CHECK: movt
+;CHECK: movw    r1, #43947
+;CHECK: movt    r1, #43962
+;CHECK: add     r0, r1
     %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
 ;CHECK: t2_const_var2_3_fail_4:
-;CHECK: movt
+;CHECK: movw    r1, #43947
+;CHECK: movt    r1, #47787
+;CHECK: add     r0, r1
     %ret = add i32 %lhs, 3131812779 ; 0xbaababab
     ret i32 %ret
 }
@@ -118,36 +131,136 @@ define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
 ; var 3 - 0x0F000000
 define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var3_1_ok_1:
-;CHECK: #251658240
+;CHECK: add.w   r0, r0, #251658240
     %ret = add i32 %lhs, 251658240 ; 0x0F000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var3_2_ok_1:
-;CHECK: #3948544
+;CHECK: add.w   r0, r0, #3948544
     %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_2(i32 %lhs) {
 ;CHECK: t2_const_var3_2_ok_2:
-;CHECK: #2097152
-;CHECK: #1843200
+;CHECK: add.w   r0, r0, #2097152
+;CHECK: add.w   r0, r0, #1843200
     %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var3_3_ok_1:
-;CHECK: #258
+;CHECK: add.w   r0, r0, #258
     %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
 ;CHECK: t2_const_var3_4_ok_1:
-;CHECK: #-268435456
+;CHECK: add.w   r0, r0, #-268435456
     %ret = add i32 %lhs, 4026531840 ; 0xF0000000
     ret i32 %ret
 }
+
+define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK: movt r0, #1234
+    %1 = and i32 %a, 65535
+    %2 = shl i32 1234, 16
+    %3 = or  i32 %1, %2
+
+    ret i32 %3
+}
+
+define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   8  ; This gives us (1234 << 16) in %4
+    %5 = and i32   %a,  %3
+    %6 = or  i32   %4,  %5
+
+    ret i32 %6
+}
+
+define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = or  i32   %5,  %6
+
+    ret i32 %7
+}
+
+define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = lshr i32  %6,   6
+    %8 = shl i32   %7,   6
+    %9 = or  i32   %5,  %8
+
+    ret i32 %8
+}
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: movs r0, #171
+    %tmp = add i32 0, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mov.w r0, #1179666
+    %tmp = add i32 0, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mov.w r0, #872428544
+    %tmp = add i32 0, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: mov.w r0, #1448498774
+    %tmp = add i32 0, 1448498774
+    ret i32 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mov.w r0, #66846720
+    %tmp = add i32 0, 66846720
+    ret i32 %tmp
+}
+
+define i32 @f6(i32 %a) {
+;CHECK: f6
+;CHECK: movw    r0, #65535
+    %tmp = add i32 0, 65535
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index bee5810..bbfb447 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep {str .*\\\[.*\],} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @test1(i32* %X, i16* %A) {
+; CHECK: test1:
+; CHECK: strh {{.*}}[{{.*}}], #-4
         %Y = load i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
         store i16 %tmp1, i16* %A
@@ -13,6 +12,8 @@ define i16 @test1(i32* %X, i16* %A) {
 }
 
 define i32 @test2(i32* %X, i32* %A) {
+; CHECK: test2:
+; CHECK: str {{.*}}[{{.*}}],
         %Y = load i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
         %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 0626d28..721d4c9 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
-; RUN:   grep {1 .*folded into instructions}
-; Increment in loop bb.128.i adjusted to 2, to prevent loop reversal from
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from
 ; kicking in.
 
 declare fastcc void @rdft(i32, i32, double*, i32*, double*)
@@ -34,6 +33,9 @@ cond_next36.i:		; preds = %cond_next.i
 	br label %bb.i28.i
 
 bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
+; CHECK: %bb.i28.i
+; CHECK: addl $2
+; CHECK: addl $2
 	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
 	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
 	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 9b52c5c..7463a0e 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -3,7 +3,7 @@
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
 
-define void @minmax(float* %result) nounwind  {
+define void @minmax(float* %result) nounwind optsize {
 entry:
 	%tmp2 = load float* %result, align 4		; <float> [#uses=6]
 	%tmp4 = getelementptr float* %result, i32 2		; <float*> [#uses=5]
diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 1f95a24..4852e89 100644
--- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin8"
 	%struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] }
 
-define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind  {
+define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind optsize {
 entry:
 	%str_addr = alloca %struct.BoundaryAlignment		; <%struct.BoundaryAlignment*> [#uses=7]
 	%s_addr = alloca i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-10-25-RewriterBug.ll b/test/CodeGen/X86/2009-10-25-RewriterBug.ll
new file mode 100644
index 0000000..5b4e818
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-25-RewriterBug.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim
+
+%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+%struct.FrameStore = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture* }
+%struct.StorablePicture = type { i32, i32, i32, i32, i32, [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16**, i16***, i8*, i16**, i8***, i64***, i64***, i16****, i8**, i8**, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, %struct.DecRefPicMarking_t*, i32 }
+
+define fastcc void @insert_picture_in_dpb(%struct.FrameStore* nocapture %fs, %struct.StorablePicture* %p) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.FrameStore* %fs, i64 0, i32 12 ; <%struct.StorablePicture**> [#uses=1]
+  %1 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  br i1 %1, label %bb.i, label %bb36.i
+
+bb.i:                                             ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb14.preheader.i
+
+bb3.i:                                            ; preds = %bb.i
+  unreachable
+
+bb14.preheader.i:                                 ; preds = %bb.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb9.i:                                            ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb20.preheader.i:                                 ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb18.i, label %bb29.preheader.i
+
+bb18.i:                                           ; preds = %bb20.preheader.i
+  unreachable
+
+bb29.preheader.i:                                 ; preds = %bb20.preheader.i
+  br i1 undef, label %bb24.i, label %bb30.i
+
+bb24.i:                                           ; preds = %bb29.preheader.i
+  unreachable
+
+bb30.i:                                           ; preds = %bb29.preheader.i
+  store i32 undef, i32* undef, align 8
+  br label %bb67.preheader.i
+
+bb36.i:                                           ; preds = %entry
+  br label %bb67.preheader.i
+
+bb67.preheader.i:                                 ; preds = %bb36.i, %bb30.i
+  %2 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %3 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %4 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %5 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %6 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %7 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %8 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %9 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %10 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %11 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %12 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb38.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %13 = phi %struct.StorablePicture* [ %37, %bb66.i ], [ %2, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %14 = phi %struct.StorablePicture* [ %38, %bb66.i ], [ %3, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %15 = phi %struct.StorablePicture* [ %39, %bb66.i ], [ %4, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %16 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %5, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %17 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %6, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %18 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %7, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %19 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %8, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %20 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %9, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %21 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %10, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %22 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %11, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %23 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %12, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %indvar248.i = phi i64 [ %indvar.next249.i, %bb66.i ], [ 0, %bb67.preheader.i ] ; <i64> [#uses=3]
+  %storemerge52.i = trunc i64 %indvar248.i to i32 ; <i32> [#uses=1]
+  %24 = getelementptr inbounds %struct.StorablePicture* %23, i64 0, i32 19 ; <i32*> [#uses=0]
+  br i1 undef, label %bb.nph51.i, label %bb66.i
+
+bb.nph51.i:                                       ; preds = %bb38.i
+  %25 = sdiv i32 %storemerge52.i, 8               ; <i32> [#uses=0]
+  br label %bb39.i
+
+bb39.i:                                           ; preds = %bb64.i, %bb.nph51.i
+  %26 = phi %struct.StorablePicture* [ %17, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %27 = phi %struct.StorablePicture* [ %18, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %28 = phi %struct.StorablePicture* [ %19, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %29 = phi %struct.StorablePicture* [ %20, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %30 = phi %struct.StorablePicture* [ %21, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %31 = phi %struct.StorablePicture* [ %22, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb57.i, label %bb40.i
+
+bb40.i:                                           ; preds = %bb39.i
+  br i1 undef, label %bb57.i, label %bb41.i
+
+bb41.i:                                           ; preds = %bb40.i
+  %storemerge10.i = select i1 undef, i32 2, i32 4 ; <i32> [#uses=1]
+  %32 = zext i32 %storemerge10.i to i64           ; <i64> [#uses=1]
+  br i1 undef, label %bb45.i, label %bb47.i
+
+bb45.i:                                           ; preds = %bb41.i
+  %33 = getelementptr inbounds %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
+  %34 = load i64* %33, align 8                    ; <i64> [#uses=1]
+  br label %bb47.i
+
+bb47.i:                                           ; preds = %bb45.i, %bb41.i
+  %storemerge11.i = phi i64 [ %34, %bb45.i ], [ 0, %bb41.i ] ; <i64> [#uses=0]
+  %scevgep246.i = getelementptr i64* undef, i64 undef ; <i64*> [#uses=0]
+  br label %bb64.i
+
+bb57.i:                                           ; preds = %bb40.i, %bb39.i
+  br i1 undef, label %bb58.i, label %bb60.i
+
+bb58.i:                                           ; preds = %bb57.i
+  br label %bb60.i
+
+bb60.i:                                           ; preds = %bb58.i, %bb57.i
+  %35 = load i64*** undef, align 8                ; <i64**> [#uses=1]
+  %scevgep256.i = getelementptr i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
+  %36 = load i64** %scevgep256.i, align 8         ; <i64*> [#uses=1]
+  %scevgep243.i = getelementptr i64* %36, i64 undef ; <i64*> [#uses=1]
+  store i64 -1, i64* %scevgep243.i, align 8
+  br label %bb64.i
+
+bb64.i:                                           ; preds = %bb60.i, %bb47.i
+  br i1 undef, label %bb39.i, label %bb66.i
+
+bb66.i:                                           ; preds = %bb64.i, %bb38.i
+  %37 = phi %struct.StorablePicture* [ %13, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %38 = phi %struct.StorablePicture* [ %14, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %39 = phi %struct.StorablePicture* [ %15, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %40 = phi %struct.StorablePicture* [ %16, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=8]
+  %indvar.next249.i = add i64 %indvar248.i, 1     ; <i64> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb68.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %41 = phi %struct.StorablePicture* [ %2, %bb67.preheader.i ], [ %37, %bb66.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %42 = phi %struct.StorablePicture* [ %3, %bb67.preheader.i ], [ %38, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %43 = phi %struct.StorablePicture* [ %4, %bb67.preheader.i ], [ %39, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph48.i, label %bb108.i
+
+bb.nph48.i:                                       ; preds = %bb68.i
+  br label %bb80.i
+
+bb80.i:                                           ; preds = %bb104.i, %bb.nph48.i
+  %44 = phi %struct.StorablePicture* [ %42, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %45 = phi %struct.StorablePicture* [ %43, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph39.i, label %bb104.i
+
+bb.nph39.i:                                       ; preds = %bb80.i
+  br label %bb81.i
+
+bb81.i:                                           ; preds = %bb102.i, %bb.nph39.i
+  %46 = phi %struct.StorablePicture* [ %44, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %47 = phi %struct.StorablePicture* [ %45, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb83.i, label %bb82.i
+
+bb82.i:                                           ; preds = %bb81.i
+  br i1 undef, label %bb83.i, label %bb101.i
+
+bb83.i:                                           ; preds = %bb82.i, %bb81.i
+  br label %bb102.i
+
+bb101.i:                                          ; preds = %bb82.i
+  br label %bb102.i
+
+bb102.i:                                          ; preds = %bb101.i, %bb83.i
+  %48 = load %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
+  br i1 undef, label %bb81.i, label %bb104.i
+
+bb104.i:                                          ; preds = %bb102.i, %bb80.i
+  br label %bb80.i
+
+bb108.i:                                          ; preds = %bb68.i
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll b/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
new file mode 100644
index 0000000..d84b63a
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s
+; rdar://7362871
+
+define void @bar(i32 %b, i32 %a) nounwind optsize ssp {
+entry:
+; CHECK:     leal 15(%rsi), %edi
+; CHECK-NOT: movl
+; CHECK:     call _foo
+  %0 = add i32 %a, 15                             ; <i32> [#uses=1]
+  %1 = zext i32 %0 to i64                         ; <i64> [#uses=1]
+  tail call void @foo(i64 %1) nounwind
+  ret void
+}
+
+declare void @foo(i64)
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index 6b245c1..972b3cd 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=false > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=none > %t
 ; RUN:   grep {%xmm0} %t | count 14
 ; RUN:   not grep {%xmm1} %t
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
 ; RUN:   grep {%xmm0} %t | count 7
 ; RUN:   grep {%xmm1} %t | count 7
 
diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
new file mode 100644
index 0000000..c3e97ad
--- /dev/null
+++ b/test/CodeGen/X86/constant-pool-sharing.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; llc should share constant pool entries between this integer vector
+; and this floating-point vector since they have the same encoding.
+
+; CHECK:  LCPI1_0(%rip), %xmm0
+; CHECK:  movaps        %xmm0, (%rdi)
+; CHECK:  movaps        %xmm0, (%rsi)
+
+define void @foo(<4 x i32>* %p, <4 x float>* %q, i1 %t) nounwind {
+entry:
+  br label %loop
+loop:
+  store <4 x i32><i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, <4 x i32>* %p
+  store <4 x float><float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float>* %q
+  br i1 %t, label %loop, label %ret
+ret:
+  ret void
+}
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 2b4b832..337f1b2 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -2,7 +2,7 @@
 ; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 5
 ; RUN: grep {leal	1(\%rsi),} %t
 
-define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2) nounwind {
+define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2) nounwind optsize {
 entry:
   %0 = add i32 %i2, 1           ; <i32> [#uses=1]
   %1 = sext i32 %0 to i64               ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/large-gep-scale.ll b/test/CodeGen/X86/large-gep-scale.ll
new file mode 100644
index 0000000..143294e
--- /dev/null
+++ b/test/CodeGen/X86/large-gep-scale.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR5281
+
+; After scaling, this type doesn't fit in memory. Codegen should generate
+; correct addressing still.
+
+; CHECK: shll $2, %edx
+
+define fastcc i32* @_ada_smkr([2147483647 x i32]* %u, i32 %t) nounwind {
+  %x = getelementptr [2147483647 x i32]* %u, i32 %t, i32 0
+  ret i32* %x
+}
diff --git a/test/CodeGen/X86/negative-stride-fptosi-user.ll b/test/CodeGen/X86/negative-stride-fptosi-user.ll
new file mode 100644
index 0000000..332e0b9
--- /dev/null
+++ b/test/CodeGen/X86/negative-stride-fptosi-user.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | grep cvtsi2sd
+
+; LSR previously eliminated the sitofp by introducing an induction
+; variable which stepped by a bogus ((double)UINT32_C(-1)). It's theoretically
+; possible to eliminate the sitofp using a proper -1.0 step though; this
+; test should be changed if that is done.
+
+define void @foo(i32 %N) nounwind {
+entry:
+  %0 = icmp slt i32 %N, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ 0, %entry ], [ %2, %bb ]      ; <i32> [#uses=2]
+  %1 = sitofp i32 %i.03 to double                  ; <double> [#uses=1]
+  tail call void @bar(double %1) nounwind
+  %2 = add nsw i32 %i.03, -1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %2, %N                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare void @bar(double)
diff --git a/test/CodeGen/X86/palignr-2.ll b/test/CodeGen/X86/palignr-2.ll
new file mode 100644
index 0000000..2936641
--- /dev/null
+++ b/test/CodeGen/X86/palignr-2.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+ssse3 | FileCheck %s
+; rdar://7341330
+
+@a = global [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 16 ; <[4 x i32]*> [#uses=1]
+@c = common global [4 x i32] zeroinitializer, align 16 ; <[4 x i32]*> [#uses=1]
+@b = global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16 ; <[4 x i32]*> [#uses=1]
+
+define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
+entry:
+; CHECK: t1:
+; palignr $3, %xmm1, %xmm0
+  %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i32 24) nounwind readnone
+  store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}
+
+declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i32) nounwind readnone
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; palignr $4, _b, %xmm0
+  %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i32 32) nounwind readnone
+  store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/pic-load-remat.ll b/test/CodeGen/X86/pic-load-remat.ll
index 7729752..d930f76 100644
--- a/test/CodeGen/X86/pic-load-remat.ll
+++ b/test/CodeGen/X86/pic-load-remat.ll
@@ -1,4 +1,10 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+; XFAIL: *
+
+; This is XFAIL'd because MachineLICM is now hoisting all of the loads, and the pic
+; base appears killed in the entry block when remat is making its decisions. Remat's
+; simple heuristic decides against rematting because it doesn't want to extend the
+; live-range of the pic base; this isn't necessarily optimal.
 
 define void @f() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 4042a09..f8d542e 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
@@ -41,3 +41,108 @@ bb:
 return:
   ret void
 }
+
+; Sink instructions with dead EFLAGS defs.
+
+; CHECK: zzz:
+; CHECK:      je
+; CHECK-NEXT: orb
+
+define zeroext i8 @zzz(i8 zeroext %a, i8 zeroext %b) nounwind readnone {
+entry:
+  %tmp = zext i8 %a to i32                        ; <i32> [#uses=1]
+  %tmp2 = icmp eq i8 %a, 0                    ; <i1> [#uses=1]
+  %tmp3 = or i8 %b, -128                          ; <i8> [#uses=1]
+  %tmp4 = and i8 %b, 127                          ; <i8> [#uses=1]
+  %b_addr.0 = select i1 %tmp2, i8 %tmp4, i8 %tmp3 ; <i8> [#uses=1]
+  ret i8 %b_addr.0
+}
+
+; Codegen should hoist and CSE these constants.
+
+; CHECK: vv:
+; CHECK: LCPI4_0(%rip), %xmm0
+; CHECK: LCPI4_1(%rip), %xmm1
+; CHECK: LCPI4_2(%rip), %xmm2
+; CHECK: align
+; CHECK-NOT: LCPI
+; CHECK: ret
+
+@_minusZero.6007 = internal constant <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> ; <<4 x float>*> [#uses=0]
+@twoTo23.6008 = internal constant <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06> ; <<4 x float>*> [#uses=0]
+
+define void @vv(float* %y, float* %x, i32* %n) nounwind ssp {
+entry:
+  br label %bb60
+
+bb:                                               ; preds = %bb60
+  %0 = bitcast float* %x_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  %1 = load <4 x float>* %0, align 16             ; <<4 x float>> [#uses=4]
+  %tmp20 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp22 = and <4 x i32> %tmp20, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
+  %tmp23 = bitcast <4 x i32> %tmp22 to <4 x float> ; <<4 x float>> [#uses=1]
+  %tmp25 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp27 = and <4 x i32> %tmp25, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=2]
+  %tmp30 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %tmp23, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) ; <<4 x float>> [#uses=1]
+  %tmp34 = bitcast <4 x float> %tmp30 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp36 = xor <4 x i32> %tmp34, <i32 -1, i32 -1, i32 -1, i32 -1> ; <<4 x i32>> [#uses=1]
+  %tmp37 = and <4 x i32> %tmp36, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200> ; <<4 x i32>> [#uses=1]
+  %tmp42 = or <4 x i32> %tmp37, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp43 = bitcast <4 x i32> %tmp42 to <4 x float> ; <<4 x float>> [#uses=2]
+  %tmp45 = fadd <4 x float> %1, %tmp43            ; <<4 x float>> [#uses=1]
+  %tmp47 = fsub <4 x float> %tmp45, %tmp43        ; <<4 x float>> [#uses=2]
+  %tmp49 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %1, <4 x float> %tmp47, i8 1) ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %tmp49 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %3 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %2) nounwind readnone ; <<4 x float>> [#uses=1]
+  %tmp53 = fadd <4 x float> %tmp47, %3            ; <<4 x float>> [#uses=1]
+  %tmp55 = bitcast <4 x float> %tmp53 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp57 = or <4 x i32> %tmp55, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp58 = bitcast <4 x i32> %tmp57 to <4 x float> ; <<4 x float>> [#uses=1]
+  %4 = bitcast float* %y_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  store <4 x float> %tmp58, <4 x float>* %4, align 16
+  %5 = getelementptr float* %x_addr.0, i64 4      ; <float*> [#uses=1]
+  %6 = getelementptr float* %y_addr.0, i64 4      ; <float*> [#uses=1]
+  %7 = add i32 %i.0, 4                            ; <i32> [#uses=1]
+  br label %bb60
+
+bb60:                                             ; preds = %bb, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %7, %bb ]       ; <i32> [#uses=2]
+  %x_addr.0 = phi float* [ %x, %entry ], [ %5, %bb ] ; <float*> [#uses=2]
+  %y_addr.0 = phi float* [ %y, %entry ], [ %6, %bb ] ; <float*> [#uses=2]
+  %8 = load i32* %n, align 4                      ; <i32> [#uses=1]
+  %9 = icmp sgt i32 %8, %i.0                      ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+return:                                           ; preds = %bb60
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+; CodeGen should use the correct register class when extracting
+; a load from a zero-extending load for hoisting.
+
+; CHECK: default_get_pch_validity:
+; CHECK: movl cl_options_count(%rip), %ecx
+
+@cl_options_count = external constant i32         ; <i32*> [#uses=2]
+
+define void @default_get_pch_validity() nounwind {
+entry:
+  %tmp4 = load i32* @cl_options_count, align 4    ; <i32> [#uses=1]
+  %tmp5 = icmp eq i32 %tmp4, 0                    ; <i1> [#uses=1]
+  br i1 %tmp5, label %bb6, label %bb2
+
+bb2:                                              ; preds = %bb2, %entry
+  %i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; <i64> [#uses=1]
+  %tmp25 = add i64 %i.019, 1                      ; <i64> [#uses=2]
+  %tmp11 = load i32* @cl_options_count, align 4   ; <i32> [#uses=1]
+  %tmp12 = zext i32 %tmp11 to i64                 ; <i64> [#uses=1]
+  %tmp13 = icmp ugt i64 %tmp12, %tmp25            ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index bf43deb..daf222e 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -3,6 +3,7 @@
 
 ; This checks that various insert/extract idiom work without going to the
 ; stack.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 
 define void @test(<4 x float>* %F, float %f) {
 entry:
diff --git a/test/CodeGen/X86/x86-64-jumps.ll b/test/CodeGen/X86/x86-64-jumps.ll
new file mode 100644
index 0000000..5ed6a23
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-jumps.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define i8 @test1() nounwind ssp {
+entry:
+  %0 = select i1 undef, i8* blockaddress(@test1, %bb), i8* blockaddress(@test1, %bb6) ; <i8*> [#uses=1]
+  indirectbr i8* %0, [label %bb, label %bb6]
+
+bb:                                               ; preds = %entry
+  ret i8 1
+
+bb6:                                              ; preds = %entry
+  ret i8 2
+}
+
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index 0f65e57..7baa7e5 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -3,7 +3,7 @@
 
 @g = alias weak i32 ()* @f
 
-define void @g() {
+define void @h() {
 entry:
 	%tmp31 = call i32 @g()
         ret void
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
new file mode 100644
index 0000000..d9a67d6
--- /dev/null
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+!0 = metadata !{i32 42}
+
+define <{i32, i32}> @f1() {
+; CHECK: !dbg !0
+  %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbg !0
+; CHECK: !dbg !0
+  %e = extractvalue <{ i32, i32 }> %r, 0, !dbg !0
+  ret <{ i32, i32 }> %r
+}
diff --git a/test/Feature/terminators.ll b/test/Feature/terminators.ll
new file mode 100644
index 0000000..1bca2a8
--- /dev/null
+++ b/test/Feature/terminators.ll
@@ -0,0 +1,43 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+        %int = type i32
+
+define i32 @squared(i32 %i0) {
+        switch i32 %i0, label %Default [
+                 i32 1, label %Case1
+                 i32 2, label %Case2
+                 i32 4, label %Case4
+        ]
+
+Default:                ; preds = %0
+        ret i32 -1
+
+Case1:          ; preds = %0
+        ret i32 1
+
+Case2:          ; preds = %0
+        ret i32 4
+
+Case4:          ; preds = %0
+        ret i32 16
+}
+
+
+@Addr = global i8* blockaddress(@indbrtest, %BB1)
+@Addr3 = global i8* blockaddress(@squared, %Case1)
+
+
+define i32 @indbrtest(i8* %P, i32* %Q) {
+  indirectbr i8* %P, [label %BB1, label %BB2, label %BB3]
+BB1:
+  indirectbr i32* %Q, []
+BB2:
+  %R = bitcast i8* blockaddress(@indbrtest, %BB3) to i8*
+  indirectbr i8* %R, [label %BB1, label %BB2, label %BB3]
+BB3:
+  ret i32 2
+}
+
+
diff --git a/test/FrontendC++/2009-10-27-crash.cpp b/test/FrontendC++/2009-10-27-crash.cpp
new file mode 100644
index 0000000..5641aa4
--- /dev/null
+++ b/test/FrontendC++/2009-10-27-crash.cpp
@@ -0,0 +1,43 @@
+// RUN: %llvmgxx -emit-llvm -S %s
+// Radar 7328944
+
+typedef struct
+{
+	unsigned short a : 1;
+	unsigned short b : 2;
+	unsigned short c : 1;
+	unsigned short d : 1;
+	unsigned short e : 1;
+	unsigned short f : 1;
+	unsigned short g : 2;
+	unsigned short : 7;
+	union
+	{
+		struct
+		{
+			unsigned char h : 1;
+			unsigned char i : 1;
+			unsigned char j : 1;
+			unsigned char : 5;
+		};
+		struct
+		{
+			unsigned char k : 3;
+			unsigned char : 5;
+		};
+	};
+	unsigned char : 8;
+} tt;
+
+typedef struct
+{
+ unsigned char s;
+ tt t;
+ unsigned int u;
+} ttt;
+
+ttt X = {
+    4,
+       { 0 },
+	55,
+};
diff --git a/test/FrontendC++/integration-O2.cpp b/test/FrontendC++/integration-O2.cpp
new file mode 100644
index 0000000..bb65ac2
--- /dev/null
+++ b/test/FrontendC++/integration-O2.cpp
@@ -0,0 +1,19 @@
+// RUN: %llvmgxx %s -O2 -S -o - | FileCheck %s
+
+// This test verifies that we get expected codegen out of the -O2 optimization
+// level from the full optimizer.
+
+
+
+// Verify that ipsccp is running and can eliminate globals.
+static int test1g = 42;
+void test1f1() {
+  if (test1g == 0) test1g = 0;
+}
+int test1f2() {
+  return test1g;
+}
+
+// CHECK: @_Z7test1f2v()
+// CHECK: entry:
+// CHECK-NEXT: ret i32 42
diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td
new file mode 100644
index 0000000..5b9f435
--- /dev/null
+++ b/test/LLVMC/OptionPreprocessor.td
@@ -0,0 +1,42 @@
+// Test for the OptionPreprocessor and any*.
+// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: grep W1 %t
+// RUN: grep W2 %t
+// RUN: grep W3 %t
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+(switch_option "foo", (help "dummy")),
+(switch_option "bar", (help "dummy")),
+(switch_option "baz", (help "dummy")),
+(parameter_option "foo_p", (help "dummy")),
+(parameter_option "bar_p", (help "dummy")),
+(parameter_option "baz_p", (help "dummy"))
+]>;
+
+def Preprocess : OptionPreprocessor<
+(case (and (switch_on "foo"), (any_switch_on ["bar", "baz"])),
+           (warning "W1"),
+      (and (switch_on ["foo", "bar"]), (any_empty ["foo_p", "bar_p"])),
+           (warning "W2"),
+      (and (empty ["foo_p", "bar_p"]), (any_not_empty ["baz_p"])),
+           (warning "W3"))
+>;
+
+// Shut up warnings...
+def dummy : Tool<
+[(in_language "dummy"),
+ (out_language "dummy"),
+ (output_suffix "d"),
+ (cmd_line "dummy $INFILE -o $OUTFILE"),
+ (actions (case (switch_on "foo"), (error),
+                (switch_on "bar"), (error),
+                (switch_on "baz"), (error),
+                (not_empty "foo_p"), (error),
+                (not_empty "bar_p"), (error),
+                (not_empty "baz_p"), (error)))
+]>;
+
+def Graph : CompilationGraph<[Edge<"root", "dummy">]>;
+
diff --git a/test/Makefile b/test/Makefile
index 4955c2e..ede1b44 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -78,9 +78,13 @@ endif
 # Both AuroraUX & Solaris do not have the -m flag for ulimit
 ifeq ($(HOST_OS),SunOS)
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
-else
+else # !SunOS
+ifeq ($(HOST_OS),AuroraUX)
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
+else # !AuroraUX
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 512000 ;
-endif
+endif # AuroraUX
+endif # SunOS
 
 ifneq ($(RUNTEST),)
 check-local:: site.exp
diff --git a/test/Other/2003-02-19-LoopInfoNestingBug.ll b/test/Other/2003-02-19-LoopInfoNestingBug.ll
index 267b0e8..13f8351 100644
--- a/test/Other/2003-02-19-LoopInfoNestingBug.ll
+++ b/test/Other/2003-02-19-LoopInfoNestingBug.ll
@@ -3,7 +3,7 @@
 ; and instead nests it just inside loop "Top"
 ;
 ; RUN: opt < %s -analyze -loops | \
-; RUN:   grep {     Loop at depth 3 containing: %Inner<header><latch><exit>}
+; RUN:   grep {     Loop at depth 3 containing: %Inner<header><latch><exiting>}
 ;
 define void @test() {
         br label %Top
diff --git a/test/Scripts/macho-dump b/test/Scripts/macho-dump
index 12ec26d..5b9943a 100755
--- a/test/Scripts/macho-dump
+++ b/test/Scripts/macho-dump
@@ -104,6 +104,9 @@ def dumpLoadCommand(f, i, opts):
       dumpSymtabCommand(f, opts)
    elif cmd == 11:
       dumpDysymtabCommand(f, opts)
+   elif cmd == 27:
+      import uuid
+      print "  ('uuid', %s)" % uuid.UUID(bytes=f.read(16))
    else:
       print >>sys.stderr,"%s: warning: unknown load command: %r" % (sys.argv[0], cmd)
       f.read(cmdSize - 8)
diff --git a/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/test/Transforms/ArgumentPromotion/aggregate-promote.ll
index 6a60e61..12de511 100644
--- a/test/Transforms/ArgumentPromotion/aggregate-promote.ll
+++ b/test/Transforms/ArgumentPromotion/aggregate-promote.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -argpromotion -instcombine -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 %QuadTy = type { i32, i32, i32, i32 }
 @G = constant %QuadTy {
diff --git a/test/Transforms/ArgumentPromotion/basictest.ll b/test/Transforms/ArgumentPromotion/basictest.ll
index 87f6371..ac9d7bf 100644
--- a/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/test/Transforms/ArgumentPromotion/basictest.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -argpromotion -mem2reg -S | not grep alloca
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 define internal i32 @test(i32* %X, i32* %Y) {
         %A = load i32* %X               ; <i32> [#uses=1]
         %B = load i32* %Y               ; <i32> [#uses=1]
diff --git a/test/Transforms/ArgumentPromotion/byval.ll b/test/Transforms/ArgumentPromotion/byval.ll
index 052528a..44b26fc 100644
--- a/test/Transforms/ArgumentPromotion/byval.ll
+++ b/test/Transforms/ArgumentPromotion/byval.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -argpromotion -scalarrepl -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 ; Argpromote + scalarrepl should change this to passing the two integers by value.
 
 	%struct.ss = type { i32, i64 }
diff --git a/test/Transforms/ArgumentPromotion/chained.ll b/test/Transforms/ArgumentPromotion/chained.ll
index 5ccb752..c9a4538 100644
--- a/test/Transforms/ArgumentPromotion/chained.ll
+++ b/test/Transforms/ArgumentPromotion/chained.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -argpromotion -instcombine -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @G1 = constant i32 0            ; <i32*> [#uses=1]
 @G2 = constant i32* @G1         ; <i32**> [#uses=1]
diff --git a/test/Transforms/ArgumentPromotion/control-flow2.ll b/test/Transforms/ArgumentPromotion/control-flow2.ll
index 79b44d4..9a8afc3 100644
--- a/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -argpromotion -S | \
 ; RUN:   grep {load i32\\* %A}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define internal i32 @callee(i1 %C, i32* %P) {
         br i1 %C, label %T, label %F
diff --git a/test/Transforms/ConstProp/float-to-ptr-cast.ll b/test/Transforms/ConstProp/float-to-ptr-cast.ll
index d8eb3e8..937f606 100644
--- a/test/Transforms/ConstProp/float-to-ptr-cast.ll
+++ b/test/Transforms/ConstProp/float-to-ptr-cast.ll
@@ -1,12 +1,15 @@
-; RUN: opt < %s -constprop -S | \
-; RUN:    grep -F {ret i32* null} | count 2
+; RUN: opt < %s -constprop -S | FileCheck %s
 
 define i32* @test1() {
         %X = inttoptr i64 0 to i32*             ; <i32*> [#uses=1]
         ret i32* %X
 }
 
+; CHECK:  ret i32* null
+
 define i32* @test2() {
         ret i32* null
 }
 
+; CHECK:  ret i32* null
+
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index f3e7f6a..edd26b8 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -2,88 +2,102 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
-@test1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
-@test2 = constant double 1.0
-@test3 = constant {i64, i64} { i64 123, i64 112312312 }
+@g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
+@g2 = constant double 1.0
+@g3 = constant {i64, i64} { i64 123, i64 112312312 }
 
 ; Simple load
 define i32 @test1() {
-  %r = load i32* getelementptr ({{i32,i8},i32}* @test1, i32 0, i32 0, i32 0)
+  %r = load i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0)
   ret i32 %r
-; @test1
+; CHECK: @test1
 ; CHECK: ret i32 -559038737
 }
 
 ; PR3152
 ; Load of first 16 bits of 32-bit value.
 define i16 @test2() {
-  %r = load i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @test1, i32 0, i32 0, i32 0) to i16*)
+  %r = load i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*)
   ret i16 %r
 
-; @test2
+; CHECK: @test2
 ; CHECK: ret i16 -16657 
 }
 
 ; Load of second 16 bits of 32-bit value.
 define i16 @test3() {
-  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @test1, i32 0, i32 0, i32 0) to i16*), i32 1)
+  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
   ret i16 %r
 
-; @test3
+; CHECK: @test3
 ; CHECK: ret i16 -8531
 }
 
 ; Load of 8 bit field + tail padding.
 define i16 @test4() {
-  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @test1, i32 0, i32 0, i32 0) to i16*), i32 2)
+  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2)
   ret i16 %r
-; @test4
+; CHECK: @test4
 ; CHECK: ret i16 186
 }
 
 ; Load of double bits.
 define i64 @test6() {
-  %r = load i64* bitcast(double* @test2 to i64*)
+  %r = load i64* bitcast(double* @g2 to i64*)
   ret i64 %r
 
-; @test6
+; CHECK: @test6
 ; CHECK: ret i64 4607182418800017408
 }
 
 ; Load of double bits.
 define i16 @test7() {
-  %r = load i16* bitcast(double* @test2 to i16*)
+  %r = load i16* bitcast(double* @g2 to i16*)
   ret i16 %r
 
-; @test7
+; CHECK: @test7
 ; CHECK: ret i16 0
 }
 
 ; Double load.
 define double @test8() {
-  %r = load double* bitcast({{i32,i8},i32}* @test1 to double*)
+  %r = load double* bitcast({{i32,i8},i32}* @g1 to double*)
   ret double %r
 
-; @test8
-; CHECK: ret double 0xDEADBEBA
+; CHECK: @test8
+; CHECK: ret double 0xBADEADBEEF
 }
 
 
 ; i128 load.
 define i128 @test9() {
-  %r = load i128* bitcast({i64, i64}* @test3 to i128*)
+  %r = load i128* bitcast({i64, i64}* @g3 to i128*)
   ret i128 %r
 
-; @test9
-; CHECK: ret i128 112312312
+; CHECK: @test9
+; CHECK: ret i128 2071796475790618158476296315
 }
 
 ; vector load.
 define <2 x i64> @test10() {
-  %r = load <2 x i64>* bitcast({i64, i64}* @test3 to <2 x i64>*)
+  %r = load <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
   ret <2 x i64> %r
 
-; @test10
-; CHECK: ret <2 x i64> <i64 112312312, i64 0>
+; CHECK: @test10
+; CHECK: ret <2 x i64> <i64 123, i64 112312312>
 }
 
+
+; PR5287
+@g4 = internal constant { i8, i8 } { i8 -95, i8 8 }
+
+define i16 @test11() nounwind {
+entry:
+  %a = load i16* bitcast ({ i8, i8 }* @g4 to i16*)
+  ret i16 %a
+  
+; CHECK: @test11
+; CHECK: ret i16 2209
+}
+
+
diff --git a/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll b/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
index 4a5d6e2..9fcbf07 100644
--- a/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
+++ b/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dse -S | not grep tmp5
 ; PR2599
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define void @foo({ i32, i32 }* %x) nounwind  {
 entry:
diff --git a/test/Transforms/DeadStoreElimination/PartialStore.ll b/test/Transforms/DeadStoreElimination/PartialStore.ll
index 0881cb9..ab1edf5 100644
--- a/test/Transforms/DeadStoreElimination/PartialStore.ll
+++ b/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -2,6 +2,7 @@
 ; RUN:    not grep {store i8}
 ; Ensure that the dead store is deleted in this case.  It is wholely
 ; overwritten by the second store.
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 define i32 @test() {
         %V = alloca i32         ; <i32*> [#uses=3]
         %V2 = bitcast i32* %V to i8*            ; <i8*> [#uses=1]
diff --git a/test/Transforms/DeadStoreElimination/context-sensitive.ll b/test/Transforms/DeadStoreElimination/context-sensitive.ll
index 0da416c..7954310 100644
--- a/test/Transforms/DeadStoreElimination/context-sensitive.ll
+++ b/test/Transforms/DeadStoreElimination/context-sensitive.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -dse -S | not grep DEAD
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 declare void @ext()
 
diff --git a/test/Transforms/DeadStoreElimination/lifetime-simple.ll b/test/Transforms/DeadStoreElimination/lifetime-simple.ll
new file mode 100644
index 0000000..430e700
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/lifetime-simple.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -dse -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8 @test2(i8* %P) nounwind {
+; CHECK: @test2
+; CHECK-NOT: store i8 1
+; CHECK: ret i8 0
+entry:
+  call void @llvm.lifetime.start(i64 32, i8* %P)
+  call void @llvm.lifetime.end(i64 32, i8* %P)
+  store i8 1, i8* %P
+  ret i8 0
+}
+
+declare {}* @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
+declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
+\ No newline at end of file
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index e89d3ab..d859640 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -dse -S | not grep DEAD
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define void @test(i32* %Q, i32* %P) {
         %DEAD = load i32* %Q            ; <i32> [#uses=1]
diff --git a/test/Transforms/GVN/invariant-simple.ll b/test/Transforms/GVN/invariant-simple.ll
new file mode 100644
index 0000000..6de75f1
--- /dev/null
+++ b/test/Transforms/GVN/invariant-simple.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8 @test(i8* %P) nounwind {
+; CHECK: @test
+; CHECK-NOT: load
+; CHECK: ret i8
+entry:
+  store i8 1, i8* %P
+  %0 = call {}* @llvm.invariant.start(i64 32, i8* %P)
+  %1 = tail call i32 @foo(i8* %P)
+  call void @llvm.invariant.end({}* %0, i64 32, i8* %P)
+  %2 = load i8* %P
+  ret i8 %2
+}
+
+define i8 @test2(i8* %P) nounwind {
+; CHECK: @test2
+; CHECK: store i8 1
+; CHECK: store i8 2
+; CHECK: ret i8 0
+entry:
+  store i8 1, i8* %P
+  %0 = call {}* @llvm.invariant.start(i64 32, i8* %P)
+  %1 = tail call i32 @bar(i8* %P)
+  call void @llvm.invariant.end({}* %0, i64 32, i8* %P)
+  store i8 2, i8* %P
+  ret i8 0
+}
+
+declare i32 @foo(i8*) nounwind 
+declare i32 @bar(i8*) nounwind readonly
+declare {}* @llvm.invariant.start(i64 %S, i8* nocapture %P) readonly
+declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)
+\ No newline at end of file
diff --git a/test/Transforms/GVN/lifetime-simple.ll b/test/Transforms/GVN/lifetime-simple.ll
new file mode 100644
index 0000000..00a0c29
--- /dev/null
+++ b/test/Transforms/GVN/lifetime-simple.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8 @test(i8* %P) nounwind {
+; CHECK: @test
+; CHECK-NOT: load
+; CHECK: ret i8 undef
+entry:
+  call void @llvm.lifetime.start(i64 32, i8* %P)
+  %0 = load i8* %P
+  store i8 1, i8* %P
+  call void @llvm.lifetime.end(i64 32, i8* %P)
+  %1 = load i8* %P
+  ret i8 %1
+}
+
+declare {}* @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
+declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
+\ No newline at end of file
diff --git a/test/Transforms/GlobalOpt/globalsra-partial.ll b/test/Transforms/GlobalOpt/globalsra-partial.ll
index 9a068e9..06485b5 100644
--- a/test/Transforms/GlobalOpt/globalsra-partial.ll
+++ b/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -1,6 +1,7 @@
 ; In this case, the global can only be broken up by one level.
 
 ; RUN: opt < %s -globalopt -S | not grep 12345
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @G = internal global { i32, [4 x float] } zeroinitializer               ; <{ i32, [4 x float] }*> [#uses=3]
 
diff --git a/test/Transforms/GlobalOpt/globalsra.ll b/test/Transforms/GlobalOpt/globalsra.ll
index 276ca64..6d8f220 100644
--- a/test/Transforms/GlobalOpt/globalsra.ll
+++ b/test/Transforms/GlobalOpt/globalsra.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -globalopt -S | not grep global
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @G = internal global { i32, float, { double } } {
     i32 1, 
diff --git a/test/Transforms/GlobalOpt/heap-sra-3.ll b/test/Transforms/GlobalOpt/heap-sra-3.ll
new file mode 100644
index 0000000..1496485
--- /dev/null
+++ b/test/Transforms/GlobalOpt/heap-sra-3.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null
+; CHECK: @X.f0
+; CHECK: @X.f1
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+  %mallocsize = mul i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), %Size, ; <i32> [#uses=1]
+; CHECK: mul i32 %Size
+  %malloccall = tail call i8* @malloc(i32 %mallocsize) ; <i8*> [#uses=1]
+  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i32)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%0 = load %struct.foo** @X, align 4		
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
+	%1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
+	%2 = load i32* %1, align 4
+	%3 = add i32 %2, %sum.0.reg2mem.0	
+	%indvar.next = add i32 %i.0.reg2mem.0, 1	
+	%exitcond = icmp eq i32 %indvar.next, 1200		
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %3
+}
+
diff --git a/test/Transforms/GlobalOpt/heap-sra-4.ll b/test/Transforms/GlobalOpt/heap-sra-4.ll
new file mode 100644
index 0000000..ae97ef1
--- /dev/null
+++ b/test/Transforms/GlobalOpt/heap-sra-4.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null
+; CHECK: @X.f0
+; CHECK: @X.f1
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+  %mallocsize = shl i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), 9, ; <i32> [#uses=1]
+  %malloccall = tail call i8* @malloc(i32 %mallocsize) ; <i8*> [#uses=1]
+; CHECK: @malloc(i32 mul (i32 512
+  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i32)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%0 = load %struct.foo** @X, align 4		
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
+	%1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
+	%2 = load i32* %1, align 4
+	%3 = add i32 %2, %sum.0.reg2mem.0	
+	%indvar.next = add i32 %i.0.reg2mem.0, 1	
+	%exitcond = icmp eq i32 %indvar.next, 1200		
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %3
+}
+
diff --git a/test/Transforms/GlobalOpt/malloc-promote-1.ll b/test/Transforms/GlobalOpt/malloc-promote-1.ll
index 5d4696f..fd510e3 100644
--- a/test/Transforms/GlobalOpt/malloc-promote-1.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-1.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -globalopt -S | not grep global
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @G = internal global i32* null          ; <i32**> [#uses=3]
 
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
index 86e90c7..3a5c0b6 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
@@ -2,6 +2,7 @@
 ; RUN: not grep inttoptr %t
 ; RUN: not grep ptrtoint %t
 ; RUN: grep scevgep %t
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
 
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
index d249432..e17368b 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -indvars -S \
 ; RUN:   | grep {\[%\]p.2.ip.1 = getelementptr \\\[3 x \\\[3 x double\\\]\\\]\\* \[%\]p, i64 2, i64 \[%\]tmp, i64 1}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Indvars shouldn't expand this to
 ;   %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 71e00cb..6531b9e 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -1,12 +1,47 @@
-; RUN: opt < %s -inline -disable-output -print-function 2> /dev/null
+; RUN: opt < %s -inline -scalarrepl -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-define i32 @func(i32 %i) {
+define i32 @test1f(i32 %i) {
         ret i32 %i
 }
 
-define i32 @main(i32 %argc) {
-        %X = call i32 @func( i32 7 )            ; <i32> [#uses=1]
-        %Y = add i32 %X, %argc          ; <i32> [#uses=1]
+define i32 @test1(i32 %W) {
+        %X = call i32 @test1f(i32 7)
+        %Y = add i32 %X, %W
         ret i32 %Y
+; CHECK: @test1(
+; CHECK-NEXT: %Y = add i32 7, %W
+; CHECK-NEXT: ret i32 %Y
 }
 
+
+
+; rdar://7339069
+
+%T = type { i32, i32 }
+
+; CHECK-NOT: @test2f
+define internal %T* @test2f(i1 %cond, %T* %P) {
+  br i1 %cond, label %T, label %F
+  
+T:
+  %A = getelementptr %T* %P, i32 0, i32 0
+  store i32 42, i32* %A
+  ret %T* %P
+  
+F:
+  ret %T* %P
+}
+
+define i32 @test2(i1 %cond) {
+  %A = alloca %T
+  
+  %B = call %T* @test2f(i1 %cond, %T* %A)
+  %C = getelementptr %T* %B, i32 0, i32 0
+  %D = load i32* %C
+  ret i32 %D
+  
+; CHECK: @test2(
+; CHECK-NOT: = alloca
+; CHECK: ret i32 42
+}
diff --git a/test/Transforms/Inline/callgraph-update.ll b/test/Transforms/Inline/callgraph-update.ll
index 528e9af..ff0120b 100644
--- a/test/Transforms/Inline/callgraph-update.ll
+++ b/test/Transforms/Inline/callgraph-update.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -loop-rotate | llvm-dis
+; RUN: opt < %s -inline -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 ; PR3601
 declare void @solve()
 
diff --git a/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll b/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll
index 4d3d48e..fdb8fd9 100644
--- a/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll
+++ b/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 declare void @free(i8*)
 
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 3862de4..710aff2 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -instcombine -S | not grep call
 ; RUN: opt < %s -std-compile-opts -S | not grep xyz
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/add-shrink.ll b/test/Transforms/InstCombine/add-shrink.ll
index 52b8e32..cc57478 100644
--- a/test/Transforms/InstCombine/add-shrink.ll
+++ b/test/Transforms/InstCombine/add-shrink.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {add i32}
+; RUN: opt < %s -instcombine -S | grep {add nsw i32}
 ; RUN: opt < %s -instcombine -S | grep sext | count 1
 
 ; Should only have one sext and the add should be i32 instead of i64.
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 24319df..98a8cb4 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {add i32}
+; RUN: opt < %s -instcombine -S | grep {add nsw i32}
 
 define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index 80aacbc..eeca5c0 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; A multi-dimensional array in a nested loop doing vector stores that
 ; aren't yet aligned. Instcombine can understand the addressing in the
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index 4253937..d8ad5a9 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Instcombine should be able to prove vector alignment in the
 ; presence of a few mild address computation tricks.
diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll
index 0260ca2..71512b3 100644
--- a/test/Transforms/InstCombine/align-inc.ll
+++ b/test/Transforms/InstCombine/align-inc.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -instcombine -S | grep {GLOBAL.*align 16}
 ; RUN: opt < %s -instcombine -S | grep {tmp = load}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @GLOBAL = internal global [4 x i32] zeroinitializer
 
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 13d664d..b9add4d 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,4 +1,5 @@
 ; Zero byte allocas should be deleted.
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; RUN: opt < %s -instcombine -S | \
 ; RUN:   not grep alloca
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index 1e37eec..05c063d 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -1,6 +1,7 @@
 ; Ignore stderr, we expect warnings there
 ; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
 
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Simple case, argument translatable without changing the value
 declare void @test1a(i8*)
diff --git a/test/Transforms/InstCombine/cast-load-gep.ll b/test/Transforms/InstCombine/cast-load-gep.ll
index 57f021c..271c737 100644
--- a/test/Transforms/InstCombine/cast-load-gep.ll
+++ b/test/Transforms/InstCombine/cast-load-gep.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -instcombine -globaldce -S | \
 ; RUN:   not grep Array
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Pulling the cast out of the load allows us to eliminate the load, and then 
 ; the whole array.
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index c5266f3..79f86e9 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,5 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/cast2.ll b/test/Transforms/InstCombine/cast2.ll
index 0ae869f..2941ee0 100644
--- a/test/Transforms/InstCombine/cast2.ll
+++ b/test/Transforms/InstCombine/cast2.ll
@@ -1,5 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i16 @test1(i16 %a) {
         %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index 5a7aef3..4be1a9c 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; Constant folding should fix notionally out-of-bounds indices
 ; and add inbounds keywords.
diff --git a/test/Transforms/InstCombine/fold-bin-operand.ll b/test/Transforms/InstCombine/fold-bin-operand.ll
index b837985..d0d072a 100644
--- a/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | not grep icmp
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i1 @f(i1 %x) {
 	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
diff --git a/test/Transforms/InstCombine/fp-ret-bitcast.ll b/test/Transforms/InstCombine/fp-ret-bitcast.ll
index 169340a..35ece42 100644
--- a/test/Transforms/InstCombine/fp-ret-bitcast.ll
+++ b/test/Transforms/InstCombine/fp-ret-bitcast.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {call float bitcast} | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 	%struct.NSObject = type { %struct.objc_class* }
  	%struct.NSArray = type { %struct.NSObject }
 	%struct.objc_class = type opaque
diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll
index ff34017..9fbe683 100644
--- a/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | grep {, align 16} | count 14
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @x = external global <2 x i64>, align 16
 @xx = external global [13 x <2 x i64>], align 16
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index fd91e44..a4b7496 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -1,11 +1,13 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 0}
-; RUN: opt < %s -instcombine -globaldce -S | not grep malloc
+; RUN: opt < %s -instcombine -globaldce -S | FileCheck %s
 ; PR1201
 define i32 @main(i32 %argc, i8** %argv) {
         %c_19 = alloca i8*              ; <i8**> [#uses=2]
         %malloc_206 = malloc i8, i32 10         ; <i8*> [#uses=1]
+; CHECK-NOT: malloc
         store i8* %malloc_206, i8** %c_19
         %tmp_207 = load i8** %c_19              ; <i8*> [#uses=1]
         free i8* %tmp_207
+; CHECK-NOT: free
         ret i32 0
+; CHECK: ret i32 0
 }
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 37f934b..b72480b 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -1,171 +1,255 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep -v xor | not grep {or }
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-        %B = or i32 %A, 0               ; <i32> [#uses=1]
+        %B = or i32 %A, 0
         ret i32 %B
+; CHECK: @test1
+; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
-        %B = or i32 %A, -1              ; <i32> [#uses=1]
+        %B = or i32 %A, -1 
         ret i32 %B
+; CHECK: @test2
+; CHECK: ret i32 -1
 }
 
 define i8 @test2a(i8 %A) {
-        %B = or i8 %A, -1               ; <i8> [#uses=1]
+        %B = or i8 %A, -1  
         ret i8 %B
+; CHECK: @test2a
+; CHECK: ret i8 -1
 }
 
 define i1 @test3(i1 %A) {
-        %B = or i1 %A, false            ; <i1> [#uses=1]
+        %B = or i1 %A, false
         ret i1 %B
+; CHECK: @test3
+; CHECK: ret i1 %A
 }
 
 define i1 @test4(i1 %A) {
-        %B = or i1 %A, true             ; <i1> [#uses=1]
+        %B = or i1 %A, true 
         ret i1 %B
+; CHECK: @test4
+; CHECK: ret i1 true
 }
 
 define i1 @test5(i1 %A) {
-        %B = or i1 %A, %A               ; <i1> [#uses=1]
+        %B = or i1 %A, %A   
         ret i1 %B
+; CHECK: @test5
+; CHECK: ret i1 %A
 }
 
 define i32 @test6(i32 %A) {
-        %B = or i32 %A, %A              ; <i32> [#uses=1]
+        %B = or i32 %A, %A  
         ret i32 %B
+; CHECK: @test6
+; CHECK: ret i32 %A
 }
 
 ; A | ~A == -1
 define i32 @test7(i32 %A) {
-        %NotA = xor i32 -1, %A          ; <i32> [#uses=1]
-        %B = or i32 %A, %NotA           ; <i32> [#uses=1]
+        %NotA = xor i32 -1, %A
+        %B = or i32 %A, %NotA
         ret i32 %B
+; CHECK: @test7
+; CHECK: ret i32 -1
 }
 
 define i8 @test8(i8 %A) {
-        %B = or i8 %A, -2               ; <i8> [#uses=1]
-        %C = or i8 %B, 1                ; <i8> [#uses=1]
+        %B = or i8 %A, -2
+        %C = or i8 %B, 1
         ret i8 %C
+; CHECK: @test8
+; CHECK: ret i8 -1
 }
 
 ; Test that (A|c1)|(B|c2) == (A|B)|(c1|c2)
 define i8 @test9(i8 %A, i8 %B) {
-        %C = or i8 %A, 1                ; <i8> [#uses=1]
-        %D = or i8 %B, -2               ; <i8> [#uses=1]
-        %E = or i8 %C, %D               ; <i8> [#uses=1]
+        %C = or i8 %A, 1
+        %D = or i8 %B, -2
+        %E = or i8 %C, %D
         ret i8 %E
+; CHECK: @test9
+; CHECK: ret i8 -1
 }
 
 define i8 @test10(i8 %A) {
-        %B = or i8 %A, 1                ; <i8> [#uses=1]
-        %C = and i8 %B, -2              ; <i8> [#uses=1]
+        %B = or i8 %A, 1
+        %C = and i8 %B, -2
         ; (X & C1) | C2 --> (X | C2) & (C1|C2)
-        %D = or i8 %C, -2               ; <i8> [#uses=1]
+        %D = or i8 %C, -2
         ret i8 %D
+; CHECK: @test10
+; CHECK: ret i8 -2
 }
 
 define i8 @test11(i8 %A) {
-        %B = or i8 %A, -2               ; <i8> [#uses=1]
-        %C = xor i8 %B, 13              ; <i8> [#uses=1]
+        %B = or i8 %A, -2
+        %C = xor i8 %B, 13
         ; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
-        %D = or i8 %C, 1                ; <i8> [#uses=1]
-        %E = xor i8 %D, 12              ; <i8> [#uses=1]
+        %D = or i8 %C, 1
+        %E = xor i8 %D, 12
         ret i8 %E
+; CHECK: @test11
+; CHECK: ret i8 -1
 }
 
 define i32 @test12(i32 %A) {
         ; Should be eliminated
-        %B = or i32 %A, 4               ; <i32> [#uses=1]
-        %C = and i32 %B, 8              ; <i32> [#uses=1]
+        %B = or i32 %A, 4
+        %C = and i32 %B, 8
         ret i32 %C
+; CHECK: @test12
+; CHECK: %C = and i32 %A, 8
+; CHECK: ret i32 %C
 }
 
 define i32 @test13(i32 %A) {
-        %B = or i32 %A, 12              ; <i32> [#uses=1]
+        %B = or i32 %A, 12
         ; Always equal to 8
-        %C = and i32 %B, 8              ; <i32> [#uses=1]
+        %C = and i32 %B, 8
         ret i32 %C
+; CHECK: @test13
+; CHECK: ret i32 8
 }
 
 define i1 @test14(i32 %A, i32 %B) {
-        %C1 = icmp ult i32 %A, %B               ; <i1> [#uses=1]
-        %C2 = icmp ugt i32 %A, %B               ; <i1> [#uses=1]
+        %C1 = icmp ult i32 %A, %B
+        %C2 = icmp ugt i32 %A, %B
         ; (A < B) | (A > B) === A != B
-        %D = or i1 %C1, %C2             ; <i1> [#uses=1]
+        %D = or i1 %C1, %C2
         ret i1 %D
+; CHECK: @test14
+; CHECK: %D = icmp ne i32 %A, %B
+; CHECK: ret i1 %D
 }
 
 define i1 @test15(i32 %A, i32 %B) {
-        %C1 = icmp ult i32 %A, %B               ; <i1> [#uses=1]
-        %C2 = icmp eq i32 %A, %B                ; <i1> [#uses=1]
+        %C1 = icmp ult i32 %A, %B
+        %C2 = icmp eq i32 %A, %B
         ; (A < B) | (A == B) === A <= B
-        %D = or i1 %C1, %C2             ; <i1> [#uses=1]
+        %D = or i1 %C1, %C2
         ret i1 %D
+; CHECK: @test15
+; CHECK: %D = icmp ule i32 %A, %B
+; CHECK: ret i1 %D
 }
 
 define i32 @test16(i32 %A) {
-        %B = and i32 %A, 1              ; <i32> [#uses=1]
+        %B = and i32 %A, 1
         ; -2 = ~1
-        %C = and i32 %A, -2             ; <i32> [#uses=1]
+        %C = and i32 %A, -2
         ; %D = and int %B, -1 == %B
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        %D = or i32 %B, %C
         ret i32 %D
+; CHECK: @test16
+; CHECK: ret i32 %A
 }
 
 define i32 @test17(i32 %A) {
-        %B = and i32 %A, 1              ; <i32> [#uses=1]
-        %C = and i32 %A, 4              ; <i32> [#uses=1]
+        %B = and i32 %A, 1
+        %C = and i32 %A, 4
         ; %D = and int %B, 5
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        %D = or i32 %B, %C
         ret i32 %D
+; CHECK: @test17
+; CHECK: %D = and i32 %A, 5
+; CHECK: ret i32 %D
 }
 
 define i1 @test18(i32 %A) {
-        %B = icmp sge i32 %A, 100               ; <i1> [#uses=1]
-        %C = icmp slt i32 %A, 50                ; <i1> [#uses=1]
+        %B = icmp sge i32 %A, 100
+        %C = icmp slt i32 %A, 50
         ;; (A-50) >u 50
-        %D = or i1 %B, %C               ; <i1> [#uses=1]
+        %D = or i1 %B, %C
         ret i1 %D
+; CHECK: @test18
+; CHECK: add i32
+; CHECK: %D = icmp ugt 
+; CHECK: ret i1 %D
 }
 
 define i1 @test19(i32 %A) {
-        %B = icmp eq i32 %A, 50         ; <i1> [#uses=1]
-        %C = icmp eq i32 %A, 51         ; <i1> [#uses=1]
+        %B = icmp eq i32 %A, 50
+        %C = icmp eq i32 %A, 51
         ;; (A-50) < 2
-        %D = or i1 %B, %C               ; <i1> [#uses=1]
+        %D = or i1 %B, %C
         ret i1 %D
+; CHECK: @test19
+; CHECK: add i32
+; CHECK: %D = icmp ult 
+; CHECK: ret i1 %D
 }
 
 define i32 @test20(i32 %x) {
-        %y = and i32 %x, 123            ; <i32> [#uses=1]
-        %z = or i32 %y, %x              ; <i32> [#uses=1]
+        %y = and i32 %x, 123
+        %z = or i32 %y, %x
         ret i32 %z
+; CHECK: @test20
+; CHECK: ret i32 %x
 }
 
 define i32 @test21(i32 %tmp.1) {
-        %tmp.1.mask1 = add i32 %tmp.1, 2                ; <i32> [#uses=1]
-        %tmp.3 = and i32 %tmp.1.mask1, -2               ; <i32> [#uses=1]
-        %tmp.5 = and i32 %tmp.1, 1              ; <i32> [#uses=1]
+        %tmp.1.mask1 = add i32 %tmp.1, 2
+        %tmp.3 = and i32 %tmp.1.mask1, -2
+        %tmp.5 = and i32 %tmp.1, 1
         ;; add tmp.1, 2
-        %tmp.6 = or i32 %tmp.5, %tmp.3          ; <i32> [#uses=1]
+        %tmp.6 = or i32 %tmp.5, %tmp.3
         ret i32 %tmp.6
+; CHECK: @test21
+; CHECK:   add i32 %{{[^,]*}}, 2
+; CHECK:   ret i32 
 }
 
 define i32 @test22(i32 %B) {
-        %ELIM41 = and i32 %B, 1         ; <i32> [#uses=1]
-        %ELIM7 = and i32 %B, -2         ; <i32> [#uses=1]
-        %ELIM5 = or i32 %ELIM41, %ELIM7         ; <i32> [#uses=1]
+        %ELIM41 = and i32 %B, 1
+        %ELIM7 = and i32 %B, -2
+        %ELIM5 = or i32 %ELIM41, %ELIM7
         ret i32 %ELIM5
+; CHECK: @test22
+; CHECK: ret i32 %B
 }
 
 define i16 @test23(i16 %A) {
-        %B = lshr i16 %A, 1             ; <i16> [#uses=1]
+        %B = lshr i16 %A, 1
         ;; fold or into xor
-        %C = or i16 %B, -32768          ; <i16> [#uses=1]
-        %D = xor i16 %C, 8193           ; <i16> [#uses=1]
+        %C = or i16 %B, -32768
+        %D = xor i16 %C, 8193
         ret i16 %D
+; CHECK: @test23
+; CHECK:   %B = lshr i16 %A, 1
+; CHECK:   %D = xor i16 %B, -24575
+; CHECK:   ret i16 %D
+}
+
+; PR1738
+define i1 @test24(double %X, double %Y) {
+        %tmp9 = fcmp uno double %X, 0.000000e+00                ; <i1> [#uses=1]
+        %tmp13 = fcmp uno double %Y, 0.000000e+00               ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp13, %tmp9         ; <i1> [#uses=1]
+        ret i1 %bothcond
+        
+; CHECK: @test24
+; CHECK:   %bothcond = fcmp uno double %Y, %X              ; <i1> [#uses=1]
+; CHECK:   ret i1 %bothcond
+}
+
+; PR3266 & PR5276
+define i1 @test25(i32 %A, i32 %B) {
+  %C = icmp eq i32 %A, 0
+  %D = icmp eq i32 %B, 57
+  %E = or i1 %C, %D
+  %F = xor i1 %E, -1
+  ret i1 %F
+
+; CHECK: @test25
+; CHECK: icmp ne i32 %A, 0
+; CHECK-NEXT: icmp ne i32 %B, 57
+; CHECK-NEXT:  %F = and i1 
+; CHECK-NEXT:  ret i1 %F
 }
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 24eca72..b73ce3f 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -1,44 +1,53 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | not grep phi
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A, i1 %b) {
 BB0:
         br i1 %b, label %BB1, label %BB2
 
-BB1:            ; preds = %BB0
+BB1:
         ; Combine away one argument PHI nodes
-        %B = phi i32 [ %A, %BB0 ]               ; <i32> [#uses=1]
+        %B = phi i32 [ %A, %BB0 ]               
         ret i32 %B
 
-BB2:            ; preds = %BB0
+BB2:
         ret i32 %A
+; CHECK: @test1
+; CHECK: BB1:
+; CHECK-NEXT: ret i32 %A
 }
 
 define i32 @test2(i32 %A, i1 %b) {
 BB0:
         br i1 %b, label %BB1, label %BB2
 
-BB1:            ; preds = %BB0
+BB1:
         br label %BB2
 
-BB2:            ; preds = %BB1, %BB0
+BB2:
         ; Combine away PHI nodes with same values
-        %B = phi i32 [ %A, %BB0 ], [ %A, %BB1 ]         ; <i32> [#uses=1]
+        %B = phi i32 [ %A, %BB0 ], [ %A, %BB1 ]         
         ret i32 %B
+; CHECK: @test2
+; CHECK: BB2:
+; CHECK-NEXT: ret i32 %A
 }
 
 define i32 @test3(i32 %A, i1 %b) {
 BB0:
         br label %Loop
 
-Loop:           ; preds = %Loop, %BB0
+Loop:
         ; PHI has same value always.
-        %B = phi i32 [ %A, %BB0 ], [ %B, %Loop ]                ; <i32> [#uses=2]
+        %B = phi i32 [ %A, %BB0 ], [ %B, %Loop ]
         br i1 %b, label %Loop, label %Exit
 
-Exit:           ; preds = %Loop
+Exit:
         ret i32 %B
+; CHECK: @test3
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %A
 }
 
 define i32 @test4(i1 %b) {
@@ -48,11 +57,14 @@ BB0:
 
 Loop:           ; preds = %L2, %Loop
         ; PHI has same value always.
-        %B = phi i32 [ %B, %L2 ], [ %B, %Loop ]         ; <i32> [#uses=2]
+        %B = phi i32 [ %B, %L2 ], [ %B, %Loop ]         
         br i1 %b, label %L2, label %Loop
 
 L2:             ; preds = %Loop
         br label %Loop
+; CHECK: @test4
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
 }
 
 define i32 @test5(i32 %A, i1 %b) {
@@ -61,26 +73,35 @@ BB0:
 
 Loop:           ; preds = %Loop, %BB0
         ; PHI has same value always.
-        %B = phi i32 [ %A, %BB0 ], [ undef, %Loop ]             ; <i32> [#uses=1]
+        %B = phi i32 [ %A, %BB0 ], [ undef, %Loop ]             
         br i1 %b, label %Loop, label %Exit
 
 Exit:           ; preds = %Loop
         ret i32 %B
+; CHECK: @test5
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %A
 }
 
-define i32 @test6(i32 %A, i1 %b) {
+define i32 @test6(i16 %A, i1 %b) {
 BB0:
-        %X = bitcast i32 %A to i32              ; <i32> [#uses=1]
+        %X = zext i16 %A to i32              
         br i1 %b, label %BB1, label %BB2
 
-BB1:            ; preds = %BB0
-        %Y = bitcast i32 %A to i32              ; <i32> [#uses=1]
+BB1:           
+        %Y = zext i16 %A to i32              
         br label %BB2
 
-BB2:            ; preds = %BB1, %BB0
+BB2:           
         ;; Suck casts into phi
-        %B = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]         ; <i32> [#uses=1]
+        %B = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]         
         ret i32 %B
+; CHECK: @test6
+; CHECK: BB2:
+; CHECK: zext i16 %A to i32
+; CHECK-NEXT: ret i32
 }
 
 define i32 @test7(i32 %A, i1 %b) {
@@ -89,12 +110,15 @@ BB0:
 
 Loop:           ; preds = %Loop, %BB0
         ; PHI is dead.
-        %B = phi i32 [ %A, %BB0 ], [ %C, %Loop ]                ; <i32> [#uses=1]
-        %C = add i32 %B, 123            ; <i32> [#uses=1]
+        %B = phi i32 [ %A, %BB0 ], [ %C, %Loop ]                
+        %C = add i32 %B, 123            
         br i1 %b, label %Loop, label %Exit
 
 Exit:           ; preds = %Loop
         ret i32 0
+; CHECK: @test7
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
 }
 
 define i32* @test8({ i32, i32 } *%A, i1 %b) {
@@ -110,6 +134,91 @@ BB2:
         ;; Suck GEPs into phi
         %B = phi i32* [ %X, %BB0 ], [ %Y, %BB1 ]
         ret i32* %B
+; CHECK: @test8
+; CHECK-NOT: phi
+; CHECK: BB2:
+; CHECK-NEXT: %B = getelementptr 
+; CHECK-NEXT: ret i32* %B
+}
+
+define i32 @test9(i32* %A, i32* %B) {
+entry:
+  %c = icmp eq i32* %A, null
+  br i1 %c, label %bb1, label %bb
+
+bb:
+  %C = load i32* %B, align 1
+  br label %bb2
+
+bb1:
+  %D = load i32* %A, align 1
+  br label %bb2
+
+bb2:
+  %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
+  ret i32 %E
+; CHECK: @test9
+; CHECK:       bb2:
+; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
+; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 1
+; CHECK-NEXT:   ret i32 %E
+
 }
 
+define i32 @test10(i32* %A, i32* %B) {
+entry:
+  %c = icmp eq i32* %A, null
+  br i1 %c, label %bb1, label %bb
+
+bb:
+  %C = load i32* %B, align 16
+  br label %bb2
+
+bb1:
+  %D = load i32* %A, align 32
+  br label %bb2
+
+bb2:
+  %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
+  ret i32 %E
+; CHECK: @test10
+; CHECK:       bb2:
+; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
+; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 16
+; CHECK-NEXT:   ret i32 %E
+}
+
+
+; PR1777
+declare i1 @test11a()
+
+define i1 @test11() {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i32
+  %b = call i1 @test11a()
+  br i1 %b, label %one, label %two
+
+one:
+  %x = phi i32 [%i, %entry], [%y, %two]
+  %c = call i1 @test11a()
+  br i1 %c, label %two, label %end
+
+two:
+  %y = phi i32 [%i, %entry], [%x, %one]
+  %d = call i1 @test11a()
+  br i1 %d, label %one, label %end
+
+end:
+  %f = phi i32 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i32 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test11a()
+  ret i1 %z
+; CHECK: @test11
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
 
diff --git a/test/Transforms/InstCombine/preserve-sminmax.ll b/test/Transforms/InstCombine/preserve-sminmax.ll
index dbfd56a..00232cc 100644
--- a/test/Transforms/InstCombine/preserve-sminmax.ll
+++ b/test/Transforms/InstCombine/preserve-sminmax.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep { i32 \[%\]sd, \[\[:alnum:\]\]* \\?1\\>} | count 4
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; Instcombine normally would fold the sdiv into the comparison,
 ; making "icmp slt i32 %h, 2", but in this case the sdiv has
@@ -13,6 +13,11 @@ define i32 @foo(i32 %h) {
   ret i32 %r
 }
 
+; CHECK:  %sd = sdiv i32 %h, 2
+; CHECK:  %t = icmp slt i32 %sd, 1
+; CHECK:  %r = select i1 %t, i32 %sd, i32 1
+; CHECK:  ret i32 %r
+
 define i32 @bar(i32 %h) {
   %sd = sdiv i32 %h, 2
   %t = icmp sgt i32 %sd, 1
@@ -20,3 +25,8 @@ define i32 @bar(i32 %h) {
   ret i32 %r
 }
 
+; CHECK:  %sd = sdiv i32 %h, 2
+; CHECK:  %t = icmp sgt i32 %sd, 1
+; CHECK:  %r = select i1 %t, i32 %sd, i32 1
+; CHECK:  ret i32 %r
+
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 2f64d8b..c7ae689 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S > %t
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i1 @test1(i32 *%x) nounwind {
 entry:
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index d6f916d..314441e 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -1,11 +1,13 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep -v {store.*,.*null} | not grep store
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define void @test1(i32* %P) {
         store i32 undef, i32* %P
         store i32 123, i32* undef
         store i32 124, i32* null
         ret void
+; CHECK: @test1(
+; CHECK-NEXT: store i32 undef, i32* null
+; CHECK-NEXT: ret void
 }
 
 define void @test2(i32* %P) {
@@ -13,5 +15,70 @@ define void @test2(i32* %P) {
         %Y = add i32 %X, 0              ; <i32> [#uses=1]
         store i32 %Y, i32* %P
         ret void
+; CHECK: @test2
+; CHECK-NEXT: ret void
+}
+
+;; Simple sinking tests
+
+; "if then else"
+define i32 @test3(i1 %C) {
+	%A = alloca i32
+        br i1 %C, label %Cond, label %Cond2
+
+Cond:
+        store i32 -987654321, i32* %A
+        br label %Cont
+
+Cond2:
+	store i32 47, i32* %A
+	br label %Cont
+
+Cont:
+	%V = load i32* %A
+	ret i32 %V
+; CHECK: @test3
+; CHECK-NOT: alloca
+; CHECK: Cont:
+; CHECK-NEXT:  %storemerge = phi i32 [ 47, %Cond2 ], [ -987654321, %Cond ]
+; CHECK-NEXT:  ret i32 %storemerge
+}
+
+; "if then"
+define i32 @test4(i1 %C) {
+	%A = alloca i32
+	store i32 47, i32* %A
+        br i1 %C, label %Cond, label %Cont
+
+Cond:
+        store i32 -987654321, i32* %A
+        br label %Cont
+
+Cont:
+	%V = load i32* %A
+	ret i32 %V
+; CHECK: @test4
+; CHECK-NOT: alloca
+; CHECK: Cont:
+; CHECK-NEXT:  %storemerge = phi i32 [ -987654321, %Cond ], [ 47, %0 ]
+; CHECK-NEXT:  ret i32 %storemerge
+}
+
+; "if then"
+define void @test5(i1 %C, i32* %P) {
+	store i32 47, i32* %P, align 1
+        br i1 %C, label %Cond, label %Cont
+
+Cond:
+        store i32 -987654321, i32* %P, align 1
+        br label %Cont
+
+Cont:
+	ret void
+; CHECK: @test5
+; CHECK: Cont:
+; CHECK-NEXT:  %storemerge = phi i32
+; CHECK-NEXT:  store i32 %storemerge, i32* %P, align 1
+; CHECK-NEXT:  ret void
 }
 
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index bd7a700..ba28910 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -1,148 +1,250 @@
-; This test makes sure that these instructions are properly eliminated.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+; Optimize subtracts.
 ;
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep -v {sub i32 %Cok, %Bok} | grep -v {sub i32 0, %Aok} | not grep sub
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-	%B = sub i32 %A, %A		; <i32> [#uses=1]
+	%B = sub i32 %A, %A	
 	ret i32 %B
+; CHECK: @test1
+; CHECK: ret i32 0
 }
 
 define i32 @test2(i32 %A) {
-	%B = sub i32 %A, 0		; <i32> [#uses=1]
+	%B = sub i32 %A, 0	
 	ret i32 %B
+; CHECK: @test2
+; CHECK: ret i32 %A
 }
 
 define i32 @test3(i32 %A) {
-	%B = sub i32 0, %A		; <i32> [#uses=1]
-	%C = sub i32 0, %B		; <i32> [#uses=1]
+	%B = sub i32 0, %A	
+	%C = sub i32 0, %B	
 	ret i32 %C
+; CHECK: @test3
+; CHECK: ret i32 %A
 }
 
 define i32 @test4(i32 %A, i32 %x) {
-	%B = sub i32 0, %A		; <i32> [#uses=1]
-	%C = sub i32 %x, %B		; <i32> [#uses=1]
+	%B = sub i32 0, %A	
+	%C = sub i32 %x, %B	
 	ret i32 %C
+; CHECK: @test4
+; CHECK: %C = add i32 %x, %A
+; CHECK: ret i32 %C
 }
 
-define i32 @test5(i32 %A, i32 %Bok, i32 %Cok) {
-	%D = sub i32 %Bok, %Cok		; <i32> [#uses=1]
-	%E = sub i32 %A, %D		; <i32> [#uses=1]
+define i32 @test5(i32 %A, i32 %B, i32 %C) {
+	%D = sub i32 %B, %C	
+	%E = sub i32 %A, %D	
 	ret i32 %E
+; CHECK: @test5
+; CHECK: %D = sub i32 %C, %B
+; CHECK: %E = add
+; CHECK: ret i32 %E
 }
 
 define i32 @test6(i32 %A, i32 %B) {
-	%C = and i32 %A, %B		; <i32> [#uses=1]
-	%D = sub i32 %A, %C		; <i32> [#uses=1]
+	%C = and i32 %A, %B	
+	%D = sub i32 %A, %C	
 	ret i32 %D
+; CHECK: @test6
+; CHECK-NEXT: xor i32 %B, -1
+; CHECK-NEXT: %D = and i32 
+; CHECK-NEXT: ret i32 %D
 }
 
 define i32 @test7(i32 %A) {
-	%B = sub i32 -1, %A		; <i32> [#uses=1]
+	%B = sub i32 -1, %A	
 	ret i32 %B
+; CHECK: @test7
+; CHECK: %B = xor i32 %A, -1
+; CHECK: ret i32 %B
 }
 
 define i32 @test8(i32 %A) {
-	%B = mul i32 9, %A		; <i32> [#uses=1]
-	%C = sub i32 %B, %A		; <i32> [#uses=1]
+	%B = mul i32 9, %A	
+	%C = sub i32 %B, %A	
 	ret i32 %C
+; CHECK: @test8
+; CHECK: %C = shl i32 %A, 3
+; CHECK: ret i32 %C
 }
 
 define i32 @test9(i32 %A) {
-	%B = mul i32 3, %A		; <i32> [#uses=1]
-	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	%B = mul i32 3, %A	
+	%C = sub i32 %A, %B	
 	ret i32 %C
+; CHECK: @test9
+; CHECK: %C = mul i32 %A, -2
+; CHECK: ret i32 %C
 }
 
 define i32 @test10(i32 %A, i32 %B) {
-	%C = sub i32 0, %A		; <i32> [#uses=1]
-	%D = sub i32 0, %B		; <i32> [#uses=1]
-	%E = mul i32 %C, %D		; <i32> [#uses=1]
+	%C = sub i32 0, %A	
+	%D = sub i32 0, %B	
+	%E = mul i32 %C, %D	
 	ret i32 %E
+; CHECK: @test10
+; CHECK: %E = mul i32 %A, %B
+; CHECK: ret i32 %E
 }
 
-define i32 @test10.upgrd.1(i32 %A) {
-	%C = sub i32 0, %A		; <i32> [#uses=1]
-	%E = mul i32 %C, 7		; <i32> [#uses=1]
+define i32 @test10a(i32 %A) {
+	%C = sub i32 0, %A	
+	%E = mul i32 %C, 7	
 	ret i32 %E
+; CHECK: @test10a
+; CHECK: %E = mul i32 %A, -7
+; CHECK: ret i32 %E
 }
 
 define i1 @test11(i8 %A, i8 %B) {
-	%C = sub i8 %A, %B		; <i8> [#uses=1]
-	%cD = icmp ne i8 %C, 0		; <i1> [#uses=1]
+	%C = sub i8 %A, %B	
+	%cD = icmp ne i8 %C, 0	
 	ret i1 %cD
+; CHECK: @test11
+; CHECK: %cD = icmp ne i8 %A, %B
+; CHECK: ret i1 %cD
 }
 
 define i32 @test12(i32 %A) {
-	%B = ashr i32 %A, 31		; <i32> [#uses=1]
-	%C = sub i32 0, %B		; <i32> [#uses=1]
+	%B = ashr i32 %A, 31	
+	%C = sub i32 0, %B	
 	ret i32 %C
+; CHECK: @test12
+; CHECK: %C = lshr i32 %A, 31
+; CHECK: ret i32 %C
 }
 
 define i32 @test13(i32 %A) {
-	%B = lshr i32 %A, 31		; <i32> [#uses=1]
-	%C = sub i32 0, %B		; <i32> [#uses=1]
+	%B = lshr i32 %A, 31	
+	%C = sub i32 0, %B	
 	ret i32 %C
+; CHECK: @test13
+; CHECK: %C = ashr i32 %A, 31
+; CHECK: ret i32 %C
 }
 
 define i32 @test14(i32 %A) {
-	%B = lshr i32 %A, 31		; <i32> [#uses=1]
-	%C = bitcast i32 %B to i32		; <i32> [#uses=1]
-	%D = sub i32 0, %C		; <i32> [#uses=1]
+	%B = lshr i32 %A, 31	
+	%C = bitcast i32 %B to i32	
+	%D = sub i32 0, %C	
 	ret i32 %D
+; CHECK: @test14
+; CHECK: %D = ashr i32 %A, 31
+; CHECK: ret i32 %D
 }
 
 define i32 @test15(i32 %A, i32 %B) {
-	%C = sub i32 0, %A		; <i32> [#uses=1]
-	%D = srem i32 %B, %C		; <i32> [#uses=1]
+	%C = sub i32 0, %A	
+	%D = srem i32 %B, %C	
 	ret i32 %D
+; CHECK: @test15
+; CHECK: %D = srem i32 %B, %A 
+; CHECK: ret i32 %D
 }
 
 define i32 @test16(i32 %A) {
-	%X = sdiv i32 %A, 1123		; <i32> [#uses=1]
-	%Y = sub i32 0, %X		; <i32> [#uses=1]
+	%X = sdiv i32 %A, 1123	
+	%Y = sub i32 0, %X	
 	ret i32 %Y
+; CHECK: @test16
+; CHECK: %Y = sdiv i32 %A, -1123
+; CHECK: ret i32 %Y
 }
 
 ; Can't fold subtract here because negation it might oveflow.
 ; PR3142
-define i32 @test17(i32 %Aok) {
-	%B = sub i32 0, %Aok		; <i32> [#uses=1]
-	%C = sdiv i32 %B, 1234		; <i32> [#uses=1]
+define i32 @test17(i32 %A) {
+	%B = sub i32 0, %A	
+	%C = sdiv i32 %B, 1234	
 	ret i32 %C
+; CHECK: @test17
+; CHECK: %B = sub i32 0, %A
+; CHECK: %C = sdiv i32 %B, 1234
+; CHECK: ret i32 %C
 }
 
 define i64 @test18(i64 %Y) {
-	%tmp.4 = shl i64 %Y, 2		; <i64> [#uses=1]
-	%tmp.12 = shl i64 %Y, 2		; <i64> [#uses=1]
-	%tmp.8 = sub i64 %tmp.4, %tmp.12		; <i64> [#uses=1]
+	%tmp.4 = shl i64 %Y, 2	
+	%tmp.12 = shl i64 %Y, 2	
+	%tmp.8 = sub i64 %tmp.4, %tmp.12	
 	ret i64 %tmp.8
+; CHECK: @test18
+; CHECK: ret i64 0
 }
 
 define i32 @test19(i32 %X, i32 %Y) {
-	%Z = sub i32 %X, %Y		; <i32> [#uses=1]
-	%Q = add i32 %Z, %Y		; <i32> [#uses=1]
+	%Z = sub i32 %X, %Y	
+	%Q = add i32 %Z, %Y	
 	ret i32 %Q
+; CHECK: @test19
+; CHECK: ret i32 %X
 }
 
 define i1 @test20(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h		; <i32> [#uses=1]
-	%tmp.4 = icmp ne i32 %tmp.2, %g		; <i1> [#uses=1]
+	%tmp.2 = sub i32 %g, %h	
+	%tmp.4 = icmp ne i32 %tmp.2, %g	
 	ret i1 %tmp.4
+; CHECK: @test20
+; CHECK: %tmp.4 = icmp ne i32 %h, 0
+; CHECK: ret i1 %tmp.4
 }
 
 define i1 @test21(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h		; <i32> [#uses=1]
-	%tmp.4 = icmp ne i32 %tmp.2, %g		; <i1> [#uses=1]
-	ret i1 %tmp.4
+	%tmp.2 = sub i32 %g, %h	
+	%tmp.4 = icmp ne i32 %tmp.2, %g		
+        ret i1 %tmp.4
+; CHECK: @test21
+; CHECK: %tmp.4 = icmp ne i32 %h, 0
+; CHECK: ret i1 %tmp.4
 }
 
 ; PR2298
-define i8 @test22(i32 %a, i32 %b) zeroext nounwind  {
-	%tmp2 = sub i32 0, %a		; <i32> [#uses=1]
-	%tmp4 = sub i32 0, %b		; <i32> [#uses=1]
-	%tmp5 = icmp eq i32 %tmp2, %tmp4		; <i1> [#uses=1]
-	%retval89 = zext i1 %tmp5 to i8		; <i8> [#uses=1]
-	ret i8 %retval89
+define i1 @test22(i32 %a, i32 %b) zeroext nounwind  {
+	%tmp2 = sub i32 0, %a	
+	%tmp4 = sub i32 0, %b	
+	%tmp5 = icmp eq i32 %tmp2, %tmp4	
+	ret i1 %tmp5
+; CHECK: @test22
+; CHECK: %tmp5 = icmp eq i32 %a, %b
+; CHECK: ret i1 %tmp5
+}
+
+; rdar://7362831
+define i32 @test23(i8* %P, i64 %A){
+  %B = getelementptr inbounds i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %D = trunc i64 %C to i32
+  %E = ptrtoint i8* %P to i64
+  %F = trunc i64 %E to i32
+  %G = sub i32 %D, %F
+  ret i32 %G
+; CHECK: @test23
+; CHECK: %A1 = trunc i64 %A to i32
+; CHECK: ret i32 %A1
+}
+
+define i64 @test24(i8* %P, i64 %A){
+  %B = getelementptr inbounds i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %E = ptrtoint i8* %P to i64
+  %G = sub i64 %C, %E
+  ret i64 %G
+; CHECK: @test24
+; CHECK-NEXT: ret i64 %A
+}
+
+define i64 @test24a(i8* %P, i64 %A){
+  %B = getelementptr inbounds i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %E = ptrtoint i8* %P to i64
+  %G = sub i64 %E, %C
+  ret i64 %G
+; CHECK: @test24a
+; CHECK-NEXT: sub i64 0, %A
+; CHECK-NEXT: ret i64 
 }
 
diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll
index b4d4418..97276b0 100644
--- a/test/Transforms/JumpThreading/no-irreducible-loops.ll
+++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg -S > %t
+; RUN: opt < %s -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg -S -verify-dom-info -verify-loop-info > %t
 ; RUN: grep {volatile store} %t | count 3
 ; RUN: not grep {br label} %t
 
diff --git a/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll b/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
index 7810309..723440f 100644
--- a/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
+++ b/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
@@ -1,15 +1,23 @@
-; RUN: opt < %s -licm -enable-licm-constant-variables -S | grep -A 1 entry | grep load.*@a
+; RUN: opt < %s -licm -enable-licm-constant-variables -S | FileCheck %s
+
 @a = external constant float*
 
 define void @test(i32 %count) {
 entry:
         br label %forcond
 
+; CHECK:  %tmp3 = load float** @a
+; CHECK:  br label %forcond
+
 forcond:
         %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]
         %cmp = icmp ult i32 %i.0, %count
         br i1 %cmp, label %forbody, label %afterfor
 
+; CHECK:  %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]
+; CHECK:  %cmp = icmp ult i32 %i.0, %count
+; CHECK:  br i1 %cmp, label %forbody, label %afterfor
+
 forbody:
         %tmp3 = load float** @a
         %arrayidx = getelementptr float* %tmp3, i32 %i.0
@@ -18,6 +26,14 @@ forbody:
         %inc = add i32 %i.0, 1
         br label %forcond
 
+; CHECK:  %arrayidx = getelementptr float* %tmp3, i32 %i.0
+; CHECK:  %tmp7 = uitofp i32 %i.0 to float
+; CHECK:  store float %tmp7, float* %arrayidx
+; CHECK:  %inc = add i32 %i.0, 1
+; CHECK:  br label %forcond
+
 afterfor:
         ret void
 }
+
+; CHECK:  ret void
diff --git a/test/Transforms/LICM/Preserve-LCSSA.ll b/test/Transforms/LICM/Preserve-LCSSA.ll
index 24c4ad1..832d762 100644
--- a/test/Transforms/LICM/Preserve-LCSSA.ll
+++ b/test/Transforms/LICM/Preserve-LCSSA.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output -verify-loop-info -verify-dom-info
 
 define i32 @stringSearch_Clib(i32 %count) {
 entry:
diff --git a/test/Transforms/LoopDeletion/multiple-exit-conditions.ll b/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
new file mode 100644
index 0000000..87f8f46
--- /dev/null
+++ b/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-deletion -S | FileCheck %s
+
+; ScalarEvolution can prove the loop iteration is finite, even though
+; it can't represent the exact trip count as an expression. That's
+; good enough to let the loop be deleted.
+
+; CHECK:      entry:
+; CHECK-NEXT:   br label %return
+
+; CHECK:      return:
+; CHECK-NEXT:   ret void
+
+define void @foo(i64 %n, i64 %m) nounwind {
+entry:
+  br label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br i1 %t4, label %bb, label %return
+
+return:
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll b/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
index 3e170dc..7036d2d 100644
--- a/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
+++ b/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate | llvm-dis
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 ; PR3408
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopRotate/LRCrash-1.ll b/test/Transforms/LoopRotate/LRCrash-1.ll
index 7d148e7..f16dd04 100644
--- a/test/Transforms/LoopRotate/LRCrash-1.ll
+++ b/test/Transforms/LoopRotate/LRCrash-1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 
 	%struct.relation = type { [4 x i16], i32, [4 x i16], i32, i32 }
 
diff --git a/test/Transforms/LoopRotate/LRCrash-2.ll b/test/Transforms/LoopRotate/LRCrash-2.ll
index e117c11..0a10989 100644
--- a/test/Transforms/LoopRotate/LRCrash-2.ll
+++ b/test/Transforms/LoopRotate/LRCrash-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 
 define void @findAllPairs() {
 entry:
diff --git a/test/Transforms/LoopRotate/LRCrash-3.ll b/test/Transforms/LoopRotate/LRCrash-3.ll
index 617dd8e..79f21fb 100644
--- a/test/Transforms/LoopRotate/LRCrash-3.ll
+++ b/test/Transforms/LoopRotate/LRCrash-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 
 define void @_ZN9Classfile4readEv() {
 entry:
diff --git a/test/Transforms/LoopRotate/LRCrash-4.ll b/test/Transforms/LoopRotate/LRCrash-4.ll
index b2f3224..7d35c16 100644
--- a/test/Transforms/LoopRotate/LRCrash-4.ll
+++ b/test/Transforms/LoopRotate/LRCrash-4.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 
 define void @InterpretSEIMessage(i8* %msg) {
 entry:
diff --git a/test/Transforms/LoopRotate/LRCrash-5.ll b/test/Transforms/LoopRotate/LRCrash-5.ll
index 7b6085d..6643cc1 100644
--- a/test/Transforms/LoopRotate/LRCrash-5.ll
+++ b/test/Transforms/LoopRotate/LRCrash-5.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index fe7eaf9..a7326fa 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -S | not grep {\\\[ .tmp224} 
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | not grep {\\\[ .tmp224} 
 ; END.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 
diff --git a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll b/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
index b0d31bd..a1aa21b 100644
--- a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
+++ b/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -disable-output
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 ; ModuleID = 'PhiSelfRefernce-1.bc'
 
 define void @snrm2(i32 %incx) {
diff --git a/test/Transforms/LoopRotate/pr2639.ll b/test/Transforms/LoopRotate/pr2639.ll
index 96f87d5..da9a3a2 100644
--- a/test/Transforms/LoopRotate/pr2639.ll
+++ b/test/Transforms/LoopRotate/pr2639.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-deletion -loop-rotate -disable-output
+; RUN: opt < %s -loop-deletion -loop-rotate -verify-dom-info -verify-loop-info -disable-output
 ; PR 2639
 
 	%struct.HexxagonMove = type { i8, i8, i32 }
diff --git a/test/Transforms/LoopRotate/preserve-scev.ll b/test/Transforms/LoopRotate/preserve-scev.ll
index 9eedaa4..7bd2232 100644
--- a/test/Transforms/LoopRotate/preserve-scev.ll
+++ b/test/Transforms/LoopRotate/preserve-scev.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-rotate -loop-reduce -disable-output
+; RUN: opt < %s -loop-rotate -loop-reduce -verify-dom-info -verify-loop-info -disable-output
 
 define fastcc void @foo() nounwind {
 BB:
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index 45f506a..0e15f08 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -loop-rotate -instcombine -indvars -S > %t
+; RUN: opt < %s -loopsimplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
 ; RUN: not grep sext %t
 ; RUN: grep {phi i64} %t | count 1
 
diff --git a/test/Transforms/Mem2Reg/crash.ll b/test/Transforms/Mem2Reg/crash.ll
index ce795aa..655549f 100644
--- a/test/Transforms/Mem2Reg/crash.ll
+++ b/test/Transforms/Mem2Reg/crash.ll
@@ -1,12 +1,12 @@
 ; RUN: opt < %s -mem2reg -S
 ; PR5023
 
-declare i32 @bar()
+declare i32 @test1f()
 
-define i32 @foo() {
+define i32 @test1() {
 entry:
   %whichFlag = alloca i32
-  %A = invoke i32 @bar()
+  %A = invoke i32 @test1f()
           to label %invcont2 unwind label %lpad86
 
 invcont2:
@@ -22,3 +22,20 @@ lpad86:
   
 }
 
+
+
+define i32 @test2() {
+entry:
+  %whichFlag = alloca i32
+  br label %bb15
+
+bb15:
+  %B = load i32* %whichFlag
+  ret i32 %B
+
+invcont2:
+  %C = load i32* %whichFlag
+  store i32 %C, i32* %whichFlag
+  br label %bb15
+}
+
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 13205e6..38a7271 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -memcpyopt -S | not grep {call.*memcpy.}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 	%a = type { i32 }
 	%b = type { float }
 
diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll
index a9d0337..47df380 100644
--- a/test/Transforms/MemCpyOpt/align.ll
+++ b/test/Transforms/MemCpyOpt/align.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -memcpyopt | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; The resulting memset is only 4-byte aligned, despite containing
 ; a 16-byte alignmed store in the middle.
diff --git a/test/Transforms/SCCP/crash.ll b/test/Transforms/SCCP/crash.ll
new file mode 100644
index 0000000..e34eaca
--- /dev/null
+++ b/test/Transforms/SCCP/crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt %s -sccp -S
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @test1(i8 %arg) {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:   
+  br label %bb34
+
+bb23: 
+  %c = icmp eq i8 %arg, undef 
+  br i1 %c, label %bb34, label %bb23
+
+bb34:
+  %Kind.1 = phi i32 [ undef, %bb ], [ %ins174, %bb23 ] 
+  %mask173 = or i32 %Kind.1, 7
+  %ins174 = and i32 %mask173, -249
+  br label %bb23
+
+return:
+  ret void
+}
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index d3584d1..e369920 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -127,10 +127,80 @@ B:
 ; CHECK: define i64 @test5b()
 ; CHECK:     A:
 ; CHECK-NEXT:  %c = call i64 @test5c(%0 %a)
-; CHECK-NEXT:  ret i64 %c
+; CHECK-NEXT:  ret i64 5
 
 define internal i64 @test5c({i64,i64} %a) {
   %b = extractvalue {i64,i64} %a, 0
   ret i64 %b
 }
 
+
+;;======================== test6
+
+define i64 @test6a() {
+  ret i64 0
+}
+
+define i64 @test6b() {
+  %a = call i64 @test6a()
+  ret i64 %a
+}
+; CHECK: define i64 @test6b
+; CHECK: ret i64 0
+
+;;======================== test7
+
+
+%T = type {i32,i32}
+
+define internal {i32, i32} @test7a(i32 %A) {
+  %X = add i32 1, %A
+  %mrv0 = insertvalue %T undef, i32 %X, 0
+  %mrv1 = insertvalue %T %mrv0, i32 %A, 1
+  ret %T %mrv1
+; CHECK: @test7a
+; CHECK-NEXT: %mrv0 = insertvalue %T undef, i32 18, 0
+; CHECK-NEXT: %mrv1 = insertvalue %T %mrv0, i32 17, 1
+}
+
+define i32 @test7b() {
+	%X = call {i32, i32} @test7a(i32 17)
+        %Y = extractvalue {i32, i32} %X, 0
+	%Z = add i32 %Y, %Y
+	ret i32 %Z
+; CHECK: define i32 @test7b
+; CHECK-NEXT: call %T @test7a(i32 17)
+; CHECK-NEXT: ret i32 36
+}
+
+;;======================== test8
+
+
+define internal {} @test8a(i32 %A, i32* %P) {
+  store i32 %A, i32* %P
+  ret {} {}
+; CHECK: @test8a
+; CHECK-NEXT: store i32 5, 
+; CHECK-NEXT: ret 
+}
+
+define void @test8b(i32* %P) {
+    %X = call {} @test8a(i32 5, i32* %P)
+    ret void
+; CHECK: define void @test8b
+; CHECK-NEXT: call { } @test8a
+; CHECK-NEXT: ret void
+}
+
+;;======================== test9
+
+@test9g = internal global {  } zeroinitializer
+
+define void @test9() {
+entry:
+        %local_foo = alloca {  }
+        load {  }* @test9g
+        store {  } %0, {  }* %local_foo
+        ret void
+}
+
diff --git a/test/Transforms/SCCP/loadtest.ll b/test/Transforms/SCCP/loadtest.ll
index fd82aef..add2af4 100644
--- a/test/Transforms/SCCP/loadtest.ll
+++ b/test/Transforms/SCCP/loadtest.ll
@@ -1,5 +1,6 @@
 ; This test makes sure that these instructions are properly constant propagated.
-;
+
+target datalayout = "e-p:32:32"
 
 ; RUN: opt < %s -sccp -S | not grep load
 
@@ -20,7 +21,13 @@ define float @test2() {
 
 define i32 @test3() {
 	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
-	%B = load i32* %A		; <i32> [#uses=1]
+	%B = load i32* %A
 	ret i32 %B
 }
 
+define i8 @test4() {
+	%A = bitcast i32* @X to i8*
+	%B = load i8* %A
+	ret i8 %B
+}
+
diff --git a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
index 824e249..7116199 100644
--- a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
+++ b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; Test that an array is not incorrectly deconstructed.
 
diff --git a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
index 4655d14..99c9fb9 100644
--- a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
+++ b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
 	%vsiidx = alloca [2 x <4 x i32>], align 16		; <[2 x <4 x i32>]*> [#uses=3]
diff --git a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
index 3ebafd0..87a08b7 100644
--- a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
+++ b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
@@ -4,6 +4,7 @@
 ; values. This checks of scalarrepl splits up the struct and array properly.
 
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @foo() {
 	%target = alloca { i32, i32 }		; <{ i32, i32 }*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index e89be5a..e32e683 100644
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -5,6 +5,7 @@
 
 ; RUN: opt < %s -scalarrepl -S > %t
 ; RUN: cat %t | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 %struct.two = type <{ < 2 x i8 >, i16 }>
 
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index d6eb75b..526457b 100644
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -2,6 +2,7 @@
 ; is only known to access it with 1-byte alignment.
 ; RUN: opt < %s -scalarrepl -S | grep {store i16 1, .*, align 1}
 ; PR3720
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
         %struct.st = type { i16 }
 
diff --git a/test/Transforms/ScalarRepl/DifferingTypes.ll b/test/Transforms/ScalarRepl/DifferingTypes.ll
index eb56824..933c47f 100644
--- a/test/Transforms/ScalarRepl/DifferingTypes.ll
+++ b/test/Transforms/ScalarRepl/DifferingTypes.ll
@@ -3,6 +3,7 @@
 ; depending on the endianness of the target...
 ; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @testfunc(i32 %i, i8 %j) {
 	%I = alloca i32		; <i32*> [#uses=3]
diff --git a/test/Transforms/ScalarRepl/arraytest.ll b/test/Transforms/ScalarRepl/arraytest.ll
index 2f68af8..06a928c 100644
--- a/test/Transforms/ScalarRepl/arraytest.ll
+++ b/test/Transforms/ScalarRepl/arraytest.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @test() {
 	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll
index a43243c..a26b62d 100644
--- a/test/Transforms/ScalarRepl/basictest.ll
+++ b/test/Transforms/ScalarRepl/basictest.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @test() {
 	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/bitfield-sroa.ll b/test/Transforms/ScalarRepl/bitfield-sroa.ll
index 6b3d414..3728658 100644
--- a/test/Transforms/ScalarRepl/bitfield-sroa.ll
+++ b/test/Transforms/ScalarRepl/bitfield-sroa.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca        
 ; rdar://6532315
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 %t = type { { i32, i16, i8, i8 } }
 
 define i8 @foo(i64 %A) {
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
index 26f007b..2992413 100644
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
 ; PR3290
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ;; Store of integer to whole alloca struct.
 define i32 @test1(i64 %V) nounwind {
diff --git a/test/Transforms/ScalarRepl/debuginfo.ll b/test/Transforms/ScalarRepl/debuginfo.ll
index 903b1a2..6b8422c 100644
--- a/test/Transforms/ScalarRepl/debuginfo.ll
+++ b/test/Transforms/ScalarRepl/debuginfo.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
diff --git a/test/Transforms/ScalarRepl/load-store-aggregate.ll b/test/Transforms/ScalarRepl/load-store-aggregate.ll
index 9ea3895..c5008ac 100644
--- a/test/Transforms/ScalarRepl/load-store-aggregate.ll
+++ b/test/Transforms/ScalarRepl/load-store-aggregate.ll
@@ -1,6 +1,7 @@
 ; This testcase shows that scalarrepl is able to replace struct alloca's which
 ; are directly loaded from or stored to (using the first class aggregates
 ; feature).
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; RUN: opt < %s -scalarrepl -S > %t
 ; RUN: cat %t | not grep alloca
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index 38a2ca0..8152785 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -scalarrepl -S | not grep {call.*memcpy}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 @C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32		; <[128 x float]*> [#uses=1]
 
 define float @grad4(i32 %hash, float %x, float %y, float %z, float %w) {
diff --git a/test/Transforms/ScalarRepl/not-a-vector.ll b/test/Transforms/ScalarRepl/not-a-vector.ll
index 7eba7c0..f873456 100644
--- a/test/Transforms/ScalarRepl/not-a-vector.ll
+++ b/test/Transforms/ScalarRepl/not-a-vector.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
 ; RUN: opt < %s -scalarrepl -S | not grep {7 x double}
 ; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret double %B}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define double @test(double %A, double %B) {
 	%ARR = alloca [7 x i64]
diff --git a/test/Transforms/ScalarRepl/union-fp-int.ll b/test/Transforms/ScalarRepl/union-fp-int.ll
index 0e1cd23..8b7e50d 100644
--- a/test/Transforms/ScalarRepl/union-fp-int.ll
+++ b/test/Transforms/ScalarRepl/union-fp-int.ll
@@ -2,6 +2,7 @@
 ; RUN:   not grep alloca
 ; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   grep {bitcast.*float.*i32}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define i32 @test(float %X) {
 	%X_addr = alloca float		; <float*> [#uses=2]
diff --git a/test/Transforms/ScalarRepl/union-packed.ll b/test/Transforms/ScalarRepl/union-packed.ll
index 63752c8..b272abf 100644
--- a/test/Transforms/ScalarRepl/union-packed.ll
+++ b/test/Transforms/ScalarRepl/union-packed.ll
@@ -2,6 +2,7 @@
 ; RUN:   not grep alloca
 ; RUN: opt < %s -scalarrepl -S | \
 ; RUN:   grep bitcast
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define <4 x i32> @test(<4 x float> %X) {
 	%X_addr = alloca <4 x float>		; <<4 x float>*> [#uses=2]
diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll
index 3af79bc..decbd30 100644
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -scalarrepl -S > %t
 ; RUN: grep {ret <16 x float> %A} %t
 ; RUN: grep {ret <16 x float> zeroinitializer} %t
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define <16 x float> @foo(<16 x float> %A) nounwind {
 	%tmp = alloca <16 x float>, align 16
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index 0284b3d..4f875b0 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -scalarrepl -S | not grep alloca
 ; RUN: opt < %s -scalarrepl -S | grep {load <4 x float>}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 define void @test(<4 x float>* %F, float %f) {
 entry:
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 468b6ed..a829e03 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -21,5 +21,10 @@ BB1:            ; preds = %0, %0
 }
 
 
-
-
+define void @test4() {
+entry:
+ br label %return
+return:
+ ret void
+}
+@test4g = global i8* blockaddress(@test4, %return)
diff --git a/test/Transforms/SimplifyCFG/duplicate-phis.ll b/test/Transforms/SimplifyCFG/duplicate-phis.ll
new file mode 100644
index 0000000..a1e5113
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/duplicate-phis.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -simplifycfg -S | grep { = phi } | count 1
+
+; instcombine should sort the PHI operands so that simplifycfg can see the
+; duplicate and remove it.
+
+define i32 @foo(i1 %t) {
+entry:
+  call void @bar()
+  br i1 %t, label %true, label %false,
+true:
+  call void @bar()
+  br label %false
+false:
+  %a = phi i32 [ 2, %true ], [ 5, %entry ]
+  %b = phi i32 [ 5, %entry ], [ 2, %true ]
+  call void @bar()
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare void @bar()
diff --git a/test/lit.cfg b/test/lit.cfg
index 7eac5c6..1965615 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -76,6 +76,7 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
         site_exp[m.group(1)] = m.group(2)
 
 # Add substitutions.
+config.substitutions.append(('%llvmgcc_only', site_exp['llvmgcc']))
 for sub in ['llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
             'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
             'bugpoint_topts']:
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index b1562d1..0e1db1b 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -46,9 +46,6 @@ namespace {
   int api_version = 0;
   int gold_version = 0;
 
-  bool generate_api_file = false;
-  const char *as_path = NULL;
-
   struct claimed_file {
     lto_module_t M;
     void *handle;
@@ -60,6 +57,37 @@ namespace {
   std::vector<sys::Path> Cleanup;
 }
 
+namespace options {
+  bool generate_api_file = false;
+  const char *as_path = NULL;
+  // Additional options to pass into the code generator.
+  // Note: This array will contain all plugin options which are not claimed 
+  // as plugin exclusive to pass to the code generator.
+  // For example, "generate-api-file" and "as"options are for the plugin 
+  // use only and will not be passed.
+  std::vector<std::string> extra;
+
+  void process_plugin_option(const char* opt)
+  {
+    if (opt == NULL)
+      return;
+
+    if (strcmp("generate-api-file", opt) == 0) {
+      generate_api_file = true;
+    } else if (strncmp("as=", opt, 3) == 0) {
+      if (as_path) {
+        (*message)(LDPL_WARNING, "Path to as specified twice. "
+                   "Discarding %s", opt);
+      } else {
+        as_path = strdup(opt + 3);
+      }
+    } else {
+      // Save this option to pass to the code generator.
+      extra.push_back(std::string(opt));
+    }
+  }
+}
+
 ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                  int *claimed);
 ld_plugin_status all_symbols_read_hook(void);
@@ -103,18 +131,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC;
         break;
       case LDPT_OPTION:
-        if (strcmp("generate-api-file", tv->tv_u.tv_string) == 0) {
-          generate_api_file = true;
-        } else if (strncmp("as=", tv->tv_u.tv_string, 3) == 0) {
-          if (as_path) {
-            (*message)(LDPL_WARNING, "Path to as specified twice. "
-                       "Discarding %s", tv->tv_u.tv_string);
-          } else {
-            as_path = strdup(tv->tv_u.tv_string + 3);
-          }
-        } else {
-          (*message)(LDPL_WARNING, "Ignoring flag %s", tv->tv_u.tv_string);
-        }
+        options::process_plugin_option(tv->tv_u.tv_string);
         break;
       case LDPT_REGISTER_CLAIM_FILE_HOOK: {
         ld_plugin_register_claim_file callback;
@@ -307,7 +324,7 @@ ld_plugin_status all_symbols_read_hook(void) {
     lto_codegen_add_module(cg, I->M);
 
   std::ofstream api_file;
-  if (generate_api_file) {
+  if (options::generate_api_file) {
     api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
     if (!api_file.is_open()) {
       (*message)(LDPL_FATAL, "Unable to open apifile.txt for writing.");
@@ -329,13 +346,13 @@ ld_plugin_status all_symbols_read_hook(void) {
           lto_codegen_add_must_preserve_symbol(cg, I->syms[i].name);
           anySymbolsPreserved = true;
 
-          if (generate_api_file)
+          if (options::generate_api_file)
             api_file << I->syms[i].name << "\n";
         }
       }
     }
 
-    if (generate_api_file)
+    if (options::generate_api_file)
       api_file.close();
 
     if (!anySymbolsPreserved) {
@@ -347,10 +364,17 @@ ld_plugin_status all_symbols_read_hook(void) {
 
   lto_codegen_set_pic_model(cg, output_type);
   lto_codegen_set_debug_model(cg, LTO_DEBUG_MODEL_DWARF);
-  if (as_path) {
-    sys::Path p = sys::Program::FindProgramByName(as_path);
+  if (options::as_path) {
+    sys::Path p = sys::Program::FindProgramByName(options::as_path);
     lto_codegen_set_assembler_path(cg, p.c_str());
   }
+  // Pass through extra options to the code generator.
+  if (!options::extra.empty()) {
+    for (std::vector<std::string>::iterator it = options::extra.begin();
+         it != options::extra.end(); ++it) {
+      lto_codegen_debug_options(cg, (*it).c_str());
+    }
+  }
 
   size_t bufsize = 0;
   const char *buffer = static_cast<const char *>(lto_codegen_compile(cg,
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 4578c4e..218bb93 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -165,8 +165,7 @@ int main(int argc, char **argv, char * const *envp) {
 
   EE->RegisterJITEventListener(createOProfileJITEventListener());
 
-  if (NoLazyCompilation)
-    EE->DisableLazyCompilation();
+  EE->DisableLazyCompilation(NoLazyCompilation);
 
   // If the user specifically requested an argv[0] to pass into the program,
   // do it now.
diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h
index 0696abc..ce292f6 100644
--- a/tools/llvm-mc/AsmLexer.h
+++ b/tools/llvm-mc/AsmLexer.h
@@ -17,7 +17,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmLexer.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <string>
 #include <cassert>
 
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 329efe9..76552b8 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -70,9 +70,9 @@ IncludeDirs("I", cl::desc("Directory of include files"),
             cl::value_desc("directory"), cl::Prefix);
 
 static cl::opt<std::string>
-TripleName("triple", cl::desc("Target triple to assemble for,"
-                          "see -version for available targets"),
-       cl::init(LLVM_HOSTTRIPLE));
+TripleName("triple", cl::desc("Target triple to assemble for, "
+                              "see -version for available targets"),
+           cl::init(LLVM_HOSTTRIPLE));
 
 enum ActionType {
   AC_AsLex,
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index 7041bd1..b92ab69 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -431,8 +431,16 @@ use TableGen inheritance instead.
 
 * Possible tests are:
 
-  - ``switch_on`` - Returns true if a given command-line switch is
-    provided by the user. Example: ``(switch_on "opt")``.
+  - ``switch_on`` - Returns true if a given command-line switch is provided by
+    the user. Can be given a list as argument, in that case ``(switch_on ["foo",
+    "bar", "baz"])`` is equivalent to ``(and (switch_on "foo"), (switch_on
+    "bar"), (switch_on "baz"))``.
+    Example: ``(switch_on "opt")``.
+
+  - ``any_switch_on`` - Given a list of switch options, returns true if any of
+    the switches is turned on.
+    Example: ``(any_switch_on ["foo", "bar", "baz"])`` is equivalent to ``(or
+    (switch_on "foo"), (switch_on "bar"), (switch_on "baz"))``.
 
   - ``parameter_equals`` - Returns true if a command-line parameter equals
     a given value.
@@ -446,18 +454,28 @@ use TableGen inheritance instead.
     belongs to the current input language set.
     Example: ``(input_languages_contain "c++")``.
 
-  - ``in_language`` - Evaluates to true if the input file language
-    equals to the argument. At the moment works only with ``cmd_line``
-    and ``actions`` (on non-join nodes).
+  - ``in_language`` - Evaluates to true if the input file language is equal to
+    the argument. At the moment works only with ``cmd_line`` and ``actions`` (on
+    non-join nodes).
     Example: ``(in_language "c++")``.
 
-  - ``not_empty`` - Returns true if a given option (which should be
-    either a parameter or a parameter list) is set by the
-    user.
+  - ``not_empty`` - Returns true if a given option (which should be either a
+    parameter or a parameter list) is set by the user. Like ``switch_on``, can
+    be also given a list as argument.
     Example: ``(not_empty "o")``.
 
+  - ``any_not_empty`` - Returns true if ``not_empty`` returns true for any of
+    the options in the list.
+    Example: ``(any_not_empty ["foo", "bar", "baz"])`` is equivalent to ``(or
+    (not_empty "foo"), (not_empty "bar"), (not_empty "baz"))``.
+
   - ``empty`` - The opposite of ``not_empty``. Equivalent to ``(not (not_empty
-    X))``. Provided for convenience.
+    X))``. Provided for convenience. Can be given a list as argument.
+
+  - ``any_not_empty`` - Returns true if ``not_empty`` returns true for any of
+    the options in the list.
+    Example: ``(any_empty ["foo", "bar", "baz"])`` is equivalent to ``(not (and
+    (not_empty "foo"), (not_empty "bar"), (not_empty "baz")))``.
 
   - ``single_input_file`` - Returns true if there was only one input file
     provided on the command-line. Used without arguments:
@@ -572,11 +590,13 @@ The list of all possible actions follows.
      Example: ``(case (switch_on "pthread"), (append_cmd
      "-lpthread"))``
 
-   - ``error` - exit with error.
+   - ``error`` - exit with error.
      Example: ``(error "Mixing -c and -S is not allowed!")``.
 
-   - ``forward`` - forward an option unchanged.
-     Example: ``(forward "Wall")``.
+   - ``warning`` - print a warning.
+     Example: ``(warning "Specifying both -O1 and -O2 is meaningless!")``.
+
+   - ``forward`` - forward an option unchanged.  Example: ``(forward "Wall")``.
 
    - ``forward_as`` - Change the name of an option, but forward the
      argument unchanged.
@@ -619,6 +639,36 @@ linked with the root node. Since tools are not allowed to have
 multiple output languages, for nodes "inside" the graph the input and
 output languages should match. This is enforced at compile-time.
 
+Option preprocessor
+===================
+
+It is sometimes useful to run error-checking code before processing the
+compilation graph. For example, if optimization options "-O1" and "-O2" are
+implemented as switches, we might want to output a warning if the user invokes
+the driver with both of these options enabled.
+
+The ``OptionPreprocessor`` feature is reserved specially for these
+occasions. Example (adapted from the built-in Base plugin)::
+
+   def Preprocess : OptionPreprocessor<
+   (case (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])),
+              [(unset_option ["O0", "O1", "O2"]),
+               (warning "Multiple -O options specified, defaulted to -O3.")],
+         (and (switch_on "O2"), (any_switch_on ["O0", "O1"])),
+              (unset_option ["O0", "O1"]),
+         (and (switch_on "O1"), (switch_on "O0")),
+              (unset_option "O0"))
+   >;
+
+Here, ``OptionPreprocessor`` is used to unset all spurious optimization options
+(so that they are not forwarded to the compiler).
+
+``OptionPreprocessor`` is basically a single big ``case`` expression, which is
+evaluated only once right after the plugin is loaded. The only allowed actions
+in ``OptionPreprocessor`` are ``error``, ``warning`` and a special action
+``unset_option``, which, as the name suggests, unsets a given option. For
+convenience, ``unset_option`` also works on lists.
+
 
 More advanced topics
 ====================
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 1950a73..12bb2ec 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -132,10 +132,6 @@ DefaultDataLayout("default-data-layout",
           cl::desc("data layout string to use if not specified by module"),
           cl::value_desc("layout-string"), cl::init(""));
 
-static cl::opt<bool>
-NoDefaultDataLayout("no-default-data-layout",
-  cl::desc("no data layout assumptions unless module specifies data layout"));
-
 // ---------- Define Printers for module and function passes ------------
 namespace {
 
@@ -401,7 +397,7 @@ int main(int argc, char **argv) {
   const std::string &ModuleDataLayout = M.get()->getDataLayout();
   if (!ModuleDataLayout.empty())
     TD = new TargetData(ModuleDataLayout);
-  else if (!NoDefaultDataLayout)
+  else if (!DefaultDataLayout.empty())
     TD = new TargetData(DefaultDataLayout);
 
   if (TD)
diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp
index 8ee166b..3dcdc39 100644
--- a/unittests/ADT/StringMapTest.cpp
+++ b/unittests/ADT/StringMapTest.cpp
@@ -9,7 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/ValueMapTest.cpp b/unittests/ADT/ValueMapTest.cpp
index 9de340c..9159657 100644
--- a/unittests/ADT/ValueMapTest.cpp
+++ b/unittests/ADT/ValueMapTest.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ValueMap.h"
-
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/OwningPtr.h"
 
 #include "gtest/gtest.h"
@@ -187,7 +187,7 @@ struct LockMutex : ValueMapConfig<KeyT> {
     *Data.CalledRAUW = true;
     EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
   }
-  static void onDeleted(const ExtraData &Data, KeyT Old) {
+  static void onDelete(const ExtraData &Data, KeyT Old) {
     *Data.CalledDeleted = true;
     EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
   }
@@ -238,7 +238,7 @@ struct CountOps : ValueMapConfig<KeyT> {
   static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
     ++*Data.RAUWs;
   }
-  static void onDeleted(const ExtraData &Data, KeyT Old) {
+  static void onDelete(const ExtraData &Data, KeyT Old) {
     ++*Data.Deletions;
   }
 };
@@ -270,7 +270,7 @@ struct ModifyingConfig : ValueMapConfig<KeyT> {
   static void onRAUW(ExtraData Map, KeyT Old, KeyT New) {
     (*Map)->erase(Old);
   }
-  static void onDeleted(ExtraData Map, KeyT Old) {
+  static void onDelete(ExtraData Map, KeyT Old) {
     (*Map)->erase(Old);
   }
 };
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
index 87e3280..dda86fb 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -37,7 +37,6 @@ struct FunctionEmittedEvent {
 };
 struct FunctionFreedEvent {
   unsigned Index;
-  const Function *F;
   void *Code;
 };
 
@@ -56,8 +55,8 @@ struct RecordingJITEventListener : public JITEventListener {
     EmittedEvents.push_back(Event);
   }
 
-  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr) {
-    FunctionFreedEvent Event = {NextIndex++, &F, OldPtr};
+  virtual void NotifyFreeingMachineCode(void *OldPtr) {
+    FunctionFreedEvent Event = {NextIndex++, OldPtr};
     FreedEvents.push_back(Event);
   }
 };
@@ -116,11 +115,9 @@ TEST_F(JITEventListenerTest, Simple) {
       << " contain some bytes.";
 
   EXPECT_EQ(2U, Listener.FreedEvents[0].Index);
-  EXPECT_EQ(F1, Listener.FreedEvents[0].F);
   EXPECT_EQ(F1_addr, Listener.FreedEvents[0].Code);
 
   EXPECT_EQ(3U, Listener.FreedEvents[1].Index);
-  EXPECT_EQ(F2, Listener.FreedEvents[1].F);
   EXPECT_EQ(F2_addr, Listener.FreedEvents[1].Code);
 
   F1->eraseFromParent();
@@ -164,7 +161,6 @@ TEST_F(JITEventListenerTest, MultipleListenersDontInterfere) {
       << " contain some bytes.";
 
   EXPECT_EQ(1U, Listener1.FreedEvents[0].Index);
-  EXPECT_EQ(F2, Listener1.FreedEvents[0].F);
   EXPECT_EQ(F2_addr, Listener1.FreedEvents[0].Code);
 
   // Listener 2.
@@ -186,7 +182,6 @@ TEST_F(JITEventListenerTest, MultipleListenersDontInterfere) {
       << " contain some bytes.";
 
   EXPECT_EQ(2U, Listener2.FreedEvents[0].Index);
-  EXPECT_EQ(F2, Listener2.FreedEvents[0].F);
   EXPECT_EQ(F2_addr, Listener2.FreedEvents[0].Code);
 
   // Listener 3.
@@ -201,7 +196,6 @@ TEST_F(JITEventListenerTest, MultipleListenersDontInterfere) {
       << " contain some bytes.";
 
   EXPECT_EQ(1U, Listener3.FreedEvents[0].Index);
-  EXPECT_EQ(F2, Listener3.FreedEvents[0].F);
   EXPECT_EQ(F2_addr, Listener3.FreedEvents[0].Code);
 
   F1->eraseFromParent();
@@ -228,7 +222,6 @@ TEST_F(JITEventListenerTest, MatchesMachineCodeInfo) {
   EXPECT_EQ(MCI.size(), Listener.EmittedEvents[0].Size);
 
   EXPECT_EQ(1U, Listener.FreedEvents[0].Index);
-  EXPECT_EQ(F, Listener.FreedEvents[0].F);
   EXPECT_EQ(F_addr, Listener.FreedEvents[0].Code);
 }
 
diff --git a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
index f0c491f..aa0c41d 100644
--- a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -13,6 +13,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
+#include "llvm/LLVMContext.h"
 
 using namespace llvm;
 
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 8f9b65a..e0568ad 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -9,6 +9,8 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Constant.h"
 #include "llvm/Constants.h"
@@ -22,10 +24,13 @@
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TypeBuilder.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Type.h"
 
+#include <vector>
+
 using namespace llvm;
 
 namespace {
@@ -45,18 +50,158 @@ Function *makeReturnGlobal(std::string Name, GlobalVariable *G, Module *M) {
   return F;
 }
 
+std::string DumpFunction(const Function *F) {
+  std::string Result;
+  raw_string_ostream(Result) << "" << *F;
+  return Result;
+}
+
+class RecordingJITMemoryManager : public JITMemoryManager {
+  const OwningPtr<JITMemoryManager> Base;
+public:
+  RecordingJITMemoryManager()
+    : Base(JITMemoryManager::CreateDefaultMemManager()) {
+  }
+
+  virtual void setMemoryWritable() { Base->setMemoryWritable(); }
+  virtual void setMemoryExecutable() { Base->setMemoryExecutable(); }
+  virtual void setPoisonMemory(bool poison) { Base->setPoisonMemory(poison); }
+  virtual void AllocateGOT() { Base->AllocateGOT(); }
+  virtual uint8_t *getGOTBase() const { return Base->getGOTBase(); }
+  virtual void SetDlsymTable(void *ptr) { Base->SetDlsymTable(ptr); }
+  virtual void *getDlsymTable() const { return Base->getDlsymTable(); }
+  struct StartFunctionBodyCall {
+    StartFunctionBodyCall(uint8_t *Result, const Function *F,
+                          uintptr_t ActualSize, uintptr_t ActualSizeResult)
+      : Result(Result), F(F), F_dump(DumpFunction(F)),
+        ActualSize(ActualSize), ActualSizeResult(ActualSizeResult) {}
+    uint8_t *Result;
+    const Function *F;
+    std::string F_dump;
+    uintptr_t ActualSize;
+    uintptr_t ActualSizeResult;
+  };
+  std::vector<StartFunctionBodyCall> startFunctionBodyCalls;
+  virtual uint8_t *startFunctionBody(const Function *F,
+                                     uintptr_t &ActualSize) {
+    uintptr_t InitialActualSize = ActualSize;
+    uint8_t *Result = Base->startFunctionBody(F, ActualSize);
+    startFunctionBodyCalls.push_back(
+      StartFunctionBodyCall(Result, F, InitialActualSize, ActualSize));
+    return Result;
+  }
+  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                                unsigned Alignment) {
+    return Base->allocateStub(F, StubSize, Alignment);
+  }
+  struct EndFunctionBodyCall {
+    EndFunctionBodyCall(const Function *F, uint8_t *FunctionStart,
+                        uint8_t *FunctionEnd)
+      : F(F), F_dump(DumpFunction(F)),
+        FunctionStart(FunctionStart), FunctionEnd(FunctionEnd) {}
+    const Function *F;
+    std::string F_dump;
+    uint8_t *FunctionStart;
+    uint8_t *FunctionEnd;
+  };
+  std::vector<EndFunctionBodyCall> endFunctionBodyCalls;
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) {
+    endFunctionBodyCalls.push_back(
+      EndFunctionBodyCall(F, FunctionStart, FunctionEnd));
+    Base->endFunctionBody(F, FunctionStart, FunctionEnd);
+  }
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+    return Base->allocateSpace(Size, Alignment);
+  }
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+    return Base->allocateGlobal(Size, Alignment);
+  }
+  struct DeallocateFunctionBodyCall {
+    DeallocateFunctionBodyCall(const void *Body) : Body(Body) {}
+    const void *Body;
+  };
+  std::vector<DeallocateFunctionBodyCall> deallocateFunctionBodyCalls;
+  virtual void deallocateFunctionBody(void *Body) {
+    deallocateFunctionBodyCalls.push_back(DeallocateFunctionBodyCall(Body));
+    Base->deallocateFunctionBody(Body);
+  }
+  struct DeallocateExceptionTableCall {
+    DeallocateExceptionTableCall(const void *ET) : ET(ET) {}
+    const void *ET;
+  };
+  std::vector<DeallocateExceptionTableCall> deallocateExceptionTableCalls;
+  virtual void deallocateExceptionTable(void *ET) {
+    deallocateExceptionTableCalls.push_back(DeallocateExceptionTableCall(ET));
+    Base->deallocateExceptionTable(ET);
+  }
+  struct StartExceptionTableCall {
+    StartExceptionTableCall(uint8_t *Result, const Function *F,
+                            uintptr_t ActualSize, uintptr_t ActualSizeResult)
+      : Result(Result), F(F), F_dump(DumpFunction(F)),
+        ActualSize(ActualSize), ActualSizeResult(ActualSizeResult) {}
+    uint8_t *Result;
+    const Function *F;
+    std::string F_dump;
+    uintptr_t ActualSize;
+    uintptr_t ActualSizeResult;
+  };
+  std::vector<StartExceptionTableCall> startExceptionTableCalls;
+  virtual uint8_t* startExceptionTable(const Function* F,
+                                       uintptr_t &ActualSize) {
+    uintptr_t InitialActualSize = ActualSize;
+    uint8_t *Result = Base->startExceptionTable(F, ActualSize);
+    startExceptionTableCalls.push_back(
+      StartExceptionTableCall(Result, F, InitialActualSize, ActualSize));
+    return Result;
+  }
+  struct EndExceptionTableCall {
+    EndExceptionTableCall(const Function *F, uint8_t *TableStart,
+                          uint8_t *TableEnd, uint8_t* FrameRegister)
+      : F(F), F_dump(DumpFunction(F)),
+        TableStart(TableStart), TableEnd(TableEnd),
+        FrameRegister(FrameRegister) {}
+    const Function *F;
+    std::string F_dump;
+    uint8_t *TableStart;
+    uint8_t *TableEnd;
+    uint8_t *FrameRegister;
+  };
+  std::vector<EndExceptionTableCall> endExceptionTableCalls;
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t* FrameRegister) {
+      endExceptionTableCalls.push_back(
+          EndExceptionTableCall(F, TableStart, TableEnd, FrameRegister));
+    return Base->endExceptionTable(F, TableStart, TableEnd, FrameRegister);
+  }
+};
+
 class JITTest : public testing::Test {
  protected:
   virtual void SetUp() {
     M = new Module("<main>", Context);
+    MP = new ExistingModuleProvider(M);
+    RJMM = new RecordingJITMemoryManager;
     std::string Error;
-    TheJIT.reset(EngineBuilder(M).setEngineKind(EngineKind::JIT)
+    TheJIT.reset(EngineBuilder(MP).setEngineKind(EngineKind::JIT)
+                 .setJITMemoryManager(RJMM)
                  .setErrorStr(&Error).create());
     ASSERT_TRUE(TheJIT.get() != NULL) << Error;
   }
 
+  void LoadAssembly(const char *assembly) {
+    SMDiagnostic Error;
+    bool success = NULL != ParseAssemblyString(assembly, M, Error, Context);
+    std::string errMsg;
+    raw_string_ostream os(errMsg);
+    Error.Print("", os);
+    ASSERT_TRUE(success) << os.str();
+  }
+
   LLVMContext Context;
-  Module *M;  // Owned by ExecutionEngine.
+  Module *M;  // Owned by MP.
+  ModuleProvider *MP;  // Owned by ExecutionEngine.
+  RecordingJITMemoryManager *RJMM;
   OwningPtr<ExecutionEngine> TheJIT;
 };
 
@@ -159,7 +304,7 @@ TEST_F(JITTest, FarCallToKnownFunction) {
   Builder.CreateRet(result);
 
   TheJIT->EnableDlsymStubs(false);
-  TheJIT->DisableLazyCompilation();
+  TheJIT->DisableLazyCompilation(true);
   int (*TestFunctionPtr)() = reinterpret_cast<int(*)()>(
       (intptr_t)TheJIT->getPointerToFunction(TestFunction));
   // This used to crash in trying to call PlusOne().
@@ -169,7 +314,7 @@ TEST_F(JITTest, FarCallToKnownFunction) {
 #if !defined(__arm__) && !defined(__powerpc__) && !defined(__ppc__)
 // Test a function C which calls A and B which call each other.
 TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
-  TheJIT->DisableLazyCompilation();
+  TheJIT->DisableLazyCompilation(true);
 
   const FunctionType *Func1Ty =
       cast<FunctionType>(TypeBuilder<void(void), false>::get(Context));
@@ -225,7 +370,7 @@ TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
 // Regression test for PR5162.  This used to trigger an AssertingVH inside the
 // JIT's Function to stub mapping.
 TEST_F(JITTest, NonLazyLeaksNoStubs) {
-  TheJIT->DisableLazyCompilation();
+  TheJIT->DisableLazyCompilation(true);
 
   // Create two functions with a single basic block each.
   const FunctionType *FuncTy =
@@ -264,6 +409,49 @@ TEST_F(JITTest, NonLazyLeaksNoStubs) {
 }
 #endif
 
+TEST_F(JITTest, ModuleDeletion) {
+  TheJIT->DisableLazyCompilation(false);
+  LoadAssembly("define void @main() { "
+               "  call i32 @computeVal() "
+               "  ret void "
+               "} "
+               " "
+               "define internal i32 @computeVal()  { "
+               "  ret i32 0 "
+               "} ");
+  Function *func = M->getFunction("main");
+  TheJIT->getPointerToFunction(func);
+  TheJIT->deleteModuleProvider(MP);
+
+  SmallPtrSet<const void*, 2> FunctionsDeallocated;
+  for (unsigned i = 0, e = RJMM->deallocateFunctionBodyCalls.size();
+       i != e; ++i) {
+    FunctionsDeallocated.insert(RJMM->deallocateFunctionBodyCalls[i].Body);
+  }
+  for (unsigned i = 0, e = RJMM->startFunctionBodyCalls.size(); i != e; ++i) {
+    EXPECT_TRUE(FunctionsDeallocated.count(
+                  RJMM->startFunctionBodyCalls[i].Result))
+      << "Function leaked: \n" << RJMM->startFunctionBodyCalls[i].F_dump;
+  }
+  EXPECT_EQ(RJMM->startFunctionBodyCalls.size(),
+            RJMM->deallocateFunctionBodyCalls.size());
+
+  SmallPtrSet<const void*, 2> ExceptionTablesDeallocated;
+  for (unsigned i = 0, e = RJMM->deallocateExceptionTableCalls.size();
+       i != e; ++i) {
+    ExceptionTablesDeallocated.insert(
+        RJMM->deallocateExceptionTableCalls[i].ET);
+  }
+  for (unsigned i = 0, e = RJMM->startExceptionTableCalls.size(); i != e; ++i) {
+    EXPECT_TRUE(ExceptionTablesDeallocated.count(
+                  RJMM->startExceptionTableCalls[i].Result))
+      << "Function's exception table leaked: \n"
+      << RJMM->startExceptionTableCalls[i].F_dump;
+  }
+  EXPECT_EQ(RJMM->startExceptionTableCalls.size(),
+            RJMM->deallocateExceptionTableCalls.size());
+}
+
 // This code is copied from JITEventListenerTest, but it only runs once for all
 // the tests in this directory.  Everything seems fine, but that's strange
 // behavior.
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index 0069c76..048924a 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../../..
 TESTNAME = JIT
-LINK_COMPONENTS := core support jit native
+LINK_COMPONENTS := asmparser core support jit native
 
 include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/Makefile.unittest b/unittests/Makefile.unittest
index 76051e4..e417435 100644
--- a/unittests/Makefile.unittest
+++ b/unittests/Makefile.unittest
@@ -19,7 +19,7 @@ include $(LEVEL)/Makefile.common
 LLVMUnitTestExe = $(BuildMode)/$(TESTNAME)Tests$(EXEEXT)
 
 CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include/
-CPP.Flags += -Wno-variadic-macros
+CPP.Flags += $(NO_VARIADIC_MACROS)
 TESTLIBS = -lGoogleTest -lUnitTestMain
 
 $(LLVMUnitTestExe): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
diff --git a/unittests/Support/ValueHandleTest.cpp b/unittests/Support/ValueHandleTest.cpp
index c89a7af..6a6528f 100644
--- a/unittests/Support/ValueHandleTest.cpp
+++ b/unittests/Support/ValueHandleTest.cpp
@@ -11,6 +11,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/OwningPtr.h"
 
 #include "gtest/gtest.h"
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index b14114a..17047e7 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -10,6 +10,7 @@
 #include "gtest/gtest.h"
 #include "llvm/Argument.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 
 using namespace llvm;
 
@@ -21,45 +22,45 @@ TEST(CloneInstruction, OverflowBits) {
   BinaryOperator *Sub = BinaryOperator::Create(Instruction::Sub, V, V);
   BinaryOperator *Mul = BinaryOperator::Create(Instruction::Mul, V, V);
 
-  EXPECT_FALSE(Add->clone()->hasNoUnsignedWrap());
-  EXPECT_FALSE(Add->clone()->hasNoSignedWrap());
-  EXPECT_FALSE(Sub->clone()->hasNoUnsignedWrap());
-  EXPECT_FALSE(Sub->clone()->hasNoSignedWrap());
-  EXPECT_FALSE(Mul->clone()->hasNoUnsignedWrap());
-  EXPECT_FALSE(Mul->clone()->hasNoSignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Add->clone())->hasNoUnsignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Add->clone())->hasNoSignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Sub->clone())->hasNoUnsignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Sub->clone())->hasNoSignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Mul->clone())->hasNoUnsignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Mul->clone())->hasNoSignedWrap());
 
   Add->setHasNoUnsignedWrap();
   Sub->setHasNoUnsignedWrap();
   Mul->setHasNoUnsignedWrap();
 
-  EXPECT_TRUE(Add->clone()->hasNoUnsignedWrap());
-  EXPECT_FALSE(Add->clone()->hasNoSignedWrap());
-  EXPECT_TRUE(Sub->clone()->hasNoUnsignedWrap());
-  EXPECT_FALSE(Sub->clone()->hasNoSignedWrap());
-  EXPECT_TRUE(Mul->clone()->hasNoUnsignedWrap());
-  EXPECT_FALSE(Mul->clone()->hasNoSignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Add->clone())->hasNoUnsignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Add->clone())->hasNoSignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Sub->clone())->hasNoUnsignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Sub->clone())->hasNoSignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Mul->clone())->hasNoUnsignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Mul->clone())->hasNoSignedWrap());
 
   Add->setHasNoSignedWrap();
   Sub->setHasNoSignedWrap();
   Mul->setHasNoSignedWrap();
 
-  EXPECT_TRUE(Add->clone()->hasNoUnsignedWrap());
-  EXPECT_TRUE(Add->clone()->hasNoSignedWrap());
-  EXPECT_TRUE(Sub->clone()->hasNoUnsignedWrap());
-  EXPECT_TRUE(Sub->clone()->hasNoSignedWrap());
-  EXPECT_TRUE(Mul->clone()->hasNoUnsignedWrap());
-  EXPECT_TRUE(Mul->clone()->hasNoSignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Add->clone())->hasNoUnsignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Add->clone())->hasNoSignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Sub->clone())->hasNoUnsignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Sub->clone())->hasNoSignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Mul->clone())->hasNoUnsignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Mul->clone())->hasNoSignedWrap());
 
   Add->setHasNoUnsignedWrap(false);
   Sub->setHasNoUnsignedWrap(false);
   Mul->setHasNoUnsignedWrap(false);
 
-  EXPECT_FALSE(Add->clone()->hasNoUnsignedWrap());
-  EXPECT_TRUE(Add->clone()->hasNoSignedWrap());
-  EXPECT_FALSE(Sub->clone()->hasNoUnsignedWrap());
-  EXPECT_TRUE(Sub->clone()->hasNoSignedWrap());
-  EXPECT_FALSE(Mul->clone()->hasNoUnsignedWrap());
-  EXPECT_TRUE(Mul->clone()->hasNoSignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Add->clone())->hasNoUnsignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Add->clone())->hasNoSignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Sub->clone())->hasNoUnsignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Sub->clone())->hasNoSignedWrap());
+  EXPECT_FALSE(cast<BinaryOperator>(Mul->clone())->hasNoUnsignedWrap());
+  EXPECT_TRUE(cast<BinaryOperator>(Mul->clone())->hasNoSignedWrap());
 }
 
 TEST(CloneInstruction, Inbounds) {
@@ -69,10 +70,10 @@ TEST(CloneInstruction, Inbounds) {
   std::vector<Value *> ops;
   ops.push_back(Z);
   GetElementPtrInst *GEP = GetElementPtrInst::Create(V, ops.begin(), ops.end());
-  EXPECT_FALSE(GEP->clone()->isInBounds());
+  EXPECT_FALSE(cast<GetElementPtrInst>(GEP->clone())->isInBounds());
 
   GEP->setIsInBounds();
-  EXPECT_TRUE(GEP->clone()->isInBounds());
+  EXPECT_TRUE(cast<GetElementPtrInst>(GEP->clone())->isInBounds());
 }
 
 TEST(CloneInstruction, Exact) {
@@ -80,8 +81,8 @@ TEST(CloneInstruction, Exact) {
   Value *V = new Argument(Type::getInt32Ty(context));
 
   BinaryOperator *SDiv = BinaryOperator::Create(Instruction::SDiv, V, V);
-  EXPECT_FALSE(SDiv->clone()->isExact());
+  EXPECT_FALSE(cast<BinaryOperator>(SDiv->clone())->isExact());
 
   SDiv->setIsExact(true);
-  EXPECT_TRUE(SDiv->clone()->isExact());
+  EXPECT_TRUE(cast<BinaryOperator>(SDiv->clone())->isExact());
 }
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index b92b068..4bd777b 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -10,6 +10,7 @@
 #include "gtest/gtest.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 6b8ceae..fab41c5 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -915,7 +915,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     bool MadeChange = false;
     MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
     MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
-    MadeChange |= UpdateNodeType(getChild(1)->getTypeNum(0), TP);
     return MadeChange;
   } else if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) {
     bool MadeChange = false;
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index d421fd0..8520d9e 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -94,7 +94,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
   isTerminator = R->getValueAsBit("isTerminator");
   isReMaterializable = R->getValueAsBit("isReMaterializable");
   hasDelaySlot = R->getValueAsBit("hasDelaySlot");
-  usesCustomDAGSchedInserter = R->getValueAsBit("usesCustomDAGSchedInserter");
+  usesCustomInserter = R->getValueAsBit("usesCustomInserter");
   hasCtrlDep   = R->getValueAsBit("hasCtrlDep");
   isNotDuplicable = R->getValueAsBit("isNotDuplicable");
   hasSideEffects = R->getValueAsBit("hasSideEffects");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 04506e9..d22ac3e 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -97,7 +97,7 @@ namespace llvm {
     bool isTerminator;
     bool isReMaterializable;
     bool hasDelaySlot;
-    bool usesCustomDAGSchedInserter;
+    bool usesCustomInserter;
     bool isVariadic;
     bool hasCtrlDep;
     bool isNotDuplicable;
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index bbb8a18..c3520c1 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -114,7 +114,7 @@ static unsigned getResultPatternCost(TreePatternNode *P,
   if (Op->isSubClassOf("Instruction")) {
     Cost++;
     CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op->getName());
-    if (II.usesCustomDAGSchedInserter)
+    if (II.usesCustomInserter)
       Cost += 10;
   }
   for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i)
@@ -1917,40 +1917,6 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
     }
   }
   
-  // Emit boilerplate.
-  OS << "SDNode *Select_INLINEASM(SDValue N) {\n"
-     << "  std::vector<SDValue> Ops(N.getNode()->op_begin(), N.getNode()->op_end());\n"
-     << "  SelectInlineAsmMemoryOperands(Ops);\n\n"
-    
-     << "  std::vector<EVT> VTs;\n"
-     << "  VTs.push_back(MVT::Other);\n"
-     << "  VTs.push_back(MVT::Flag);\n"
-     << "  SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(), "
-                 "VTs, &Ops[0], Ops.size());\n"
-     << "  return New.getNode();\n"
-     << "}\n\n";
-
-  OS << "SDNode *Select_UNDEF(const SDValue &N) {\n"
-     << "  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::IMPLICIT_DEF,\n"
-     << "                              N.getValueType());\n"
-     << "}\n\n";
-
-  OS << "SDNode *Select_DBG_LABEL(const SDValue &N) {\n"
-     << "  SDValue Chain = N.getOperand(0);\n"
-     << "  unsigned C = cast<LabelSDNode>(N)->getLabelID();\n"
-     << "  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);\n"
-     << "  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DBG_LABEL,\n"
-     << "                              MVT::Other, Tmp, Chain);\n"
-     << "}\n\n";
-
-  OS << "SDNode *Select_EH_LABEL(const SDValue &N) {\n"
-     << "  SDValue Chain = N.getOperand(0);\n"
-     << "  unsigned C = cast<LabelSDNode>(N)->getLabelID();\n"
-     << "  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);\n"
-     << "  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::EH_LABEL,\n"
-     << "                              MVT::Other, Tmp, Chain);\n"
-     << "}\n\n";
-
   OS << "// The main instruction selector code.\n"
      << "SDNode *SelectCode(SDValue N) {\n"
      << "  MVT::SimpleValueType NVT = N.getNode()->getValueType(0).getSimpleVT().SimpleTy;\n"
@@ -1967,6 +1933,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "  case ISD::TargetConstantPool:\n"
      << "  case ISD::TargetFrameIndex:\n"
      << "  case ISD::TargetExternalSymbol:\n"
+     << "  case ISD::TargetBlockAddress:\n"
      << "  case ISD::TargetJumpTable:\n"
      << "  case ISD::TargetGlobalTLSAddress:\n"
      << "  case ISD::TargetGlobalAddress:\n"
@@ -2054,30 +2021,6 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "  }\n"
      << "  return NULL;\n"
      << "}\n\n";
-
-  OS << "void CannotYetSelect(SDValue N) DISABLE_INLINE {\n"
-     << "  std::string msg;\n"
-     << "  raw_string_ostream Msg(msg);\n"
-     << "  Msg << \"Cannot yet select: \";\n"
-     << "  N.getNode()->print(Msg, CurDAG);\n"
-     << "  llvm_report_error(Msg.str());\n"
-     << "}\n\n";
-
-  OS << "void CannotYetSelectIntrinsic(SDValue N) DISABLE_INLINE {\n"
-     << "  errs() << \"Cannot yet select: \";\n"
-     << "  unsigned iid = cast<ConstantSDNode>(N.getOperand("
-     << "N.getOperand(0).getValueType() == MVT::Other))->getZExtValue();\n"
-     << "  if (iid < Intrinsic::num_intrinsics)\n"
-     << "    llvm_report_error(\"Cannot yet select: intrinsic %\" + "
-     << "Intrinsic::getName((Intrinsic::ID)iid));\n";
-  if (CGP.hasTargetIntrinsics()) {
-    OS << "  else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())\n"
-       << "    llvm_report_error(Twine(\"Cannot yet select: target intrinsic "
-       << "%\") + tii->getName(iid));\n";
-  }
-  OS << "  else\n"
-     << "    llvm_report_error(\"Cannot yet select: invalid intrinsic\");\n"
-     << "}\n\n";
 }
 
 void DAGISelEmitter::run(raw_ostream &OS) {
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 3a104ea..adb98fb9 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -275,8 +275,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isReMaterializable) OS << "|(1<<TID::Rematerializable)";
   if (Inst.isNotDuplicable)    OS << "|(1<<TID::NotDuplicable)";
   if (Inst.hasOptionalDef)     OS << "|(1<<TID::HasOptionalDef)";
-  if (Inst.usesCustomDAGSchedInserter)
-    OS << "|(1<<TID::UsesCustomDAGSchedInserter)";
+  if (Inst.usesCustomInserter) OS << "|(1<<TID::UsesCustomInserter)";
   if (Inst.isVariadic)         OS << "|(1<<TID::Variadic)";
   if (Inst.hasSideEffects)     OS << "|(1<<TID::UnmodeledSideEffects)";
   if (Inst.isAsCheapAsAMove)   OS << "|(1<<TID::CheapAsAMove)";
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index f5d1139..546988a 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -28,7 +28,6 @@
 
 using namespace llvm;
 
-namespace {
 
 //===----------------------------------------------------------------------===//
 /// Typedefs
@@ -40,16 +39,18 @@ typedef std::vector<std::string> StrVector;
 /// Constants
 
 // Indentation.
-unsigned TabWidth = 4;
-unsigned Indent1  = TabWidth*1;
-unsigned Indent2  = TabWidth*2;
-unsigned Indent3  = TabWidth*3;
+static const unsigned TabWidth = 4;
+static const unsigned Indent1  = TabWidth*1;
+static const unsigned Indent2  = TabWidth*2;
+static const unsigned Indent3  = TabWidth*3;
 
 // Default help string.
-const char * DefaultHelpString = "NO HELP MESSAGE PROVIDED";
+static const char * const DefaultHelpString = "NO HELP MESSAGE PROVIDED";
 
 // Name for the "sink" option.
-const char * SinkOptionName = "AutoGeneratedSinkOption";
+static const char * const SinkOptionName = "AutoGeneratedSinkOption";
+
+namespace {
 
 //===----------------------------------------------------------------------===//
 /// Helper functions
@@ -1901,7 +1902,8 @@ void EmitGenerateActionMethod (const ToolDescription& D,
 
   // For every understood option, emit handling code.
   if (D.Actions)
-    EmitCaseConstructHandler(D.Actions, Indent2, EmitActionHandlersCallback(OptDescs),
+    EmitCaseConstructHandler(D.Actions, Indent2,
+                             EmitActionHandlersCallback(OptDescs),
                              false, OptDescs, O);
 
   O << '\n';
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index a551166..25fe5c4 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Record.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/Format.h"
 #include "llvm/ADT/StringExtras.h"
 
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 1b33743..752bd0c 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -16,7 +16,7 @@
 #define RECORD_H
 
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index 80405ac..6790208 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -14,7 +14,7 @@
 #ifndef TGLEXER_H
 #define TGLEXER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/System/DataTypes.h"
 #include <vector>
 #include <string>
 #include <cassert>
diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl
index 3aa2f88..6d24d90 100755
--- a/utils/UpdateCMakeLists.pl
+++ b/utils/UpdateCMakeLists.pl
@@ -68,7 +68,7 @@ sub UpdateCMake {
   while(<IN>) {
     if (!$foundLibrary) {
       print OUT $_;
-      if (/^add_clang_library\(/ || /^add_llvm_library\(/) {
+      if (/^add_clang_library\(/ || /^add_llvm_library\(/ || /^add_llvm_target\(/) {
         $foundLibrary = 1;
         EmitCMakeList($dir);
       }
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 2bdb208..9168d1a 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -59,6 +59,10 @@ echo DARWIN_VERS = $DARWIN_VERS
 if [ "x$RC_ProjectName" = "xllvmCore_Embedded" ]; then
     DT_HOME=$DEST_DIR/Developer/Platforms/iPhoneOS.platform/Developer/usr
     DEST_ROOT="/Developer/Platforms/iPhoneOS.platform/Developer$DEST_ROOT"
+elif [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
+    DT_HOME=$DEST_DIR/usr
+    DEST_ROOT="/Developer$DEST_ROOT"
+    HOST_SDKROOT=$SDKROOT
 else
     DT_HOME=$DEST_DIR/Developer/usr
     DEST_ROOT="/Developer$DEST_ROOT"
@@ -91,24 +95,69 @@ sed -e '/[Aa]pple-style/d' -e '/include.*GNUmakefile/d' $ORIG_SRC_DIR/Makefile >
 mkdir -p $DIR/obj-llvm || exit 1
 cd $DIR/obj-llvm || exit 1
 
-# If the user has set CC or CXX, respect their wishes.  If not,
-# compile with LLVM-GCC/LLVM-G++ if available; if LLVM is not
-# available, fall back to usual GCC/G++ default.
-savedPATH=$PATH ; PATH="$PATH:/Developer/usr/bin"
-XTMPCC=$(which llvm-gcc)
-if [ x$CC  = x -a x$XTMPCC != x ] ; then export CC=$XTMPCC  ; fi
-XTMPCC=$(which llvm-g++)
-if [ x$CXX = x -a x$XTMPCC != x ] ; then export CXX=$XTMPCC ; fi
-PATH=$savedPATH
-unset XTMPCC savedPATH
-
-if [ \! -f Makefile.config ]; then
-    $SRC_DIR/configure --prefix=$DT_HOME/local \
-        --enable-targets=arm,x86,powerpc,cbe \
-        --enable-assertions=$LLVM_ASSERTIONS \
-        --enable-optimized=$LLVM_OPTIMIZED \
-        --disable-bindings \
-        || exit 1
+
+if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
+  # The cross-tools' build process expects to find an existing cross toolchain
+  # under names like 'arm-apple-darwin$DARWIN_VERS-as'; so make them.
+  rm -rf $DIR/bin || exit 1
+  mkdir $DIR/bin || exit 1
+  for prog in ar nm ranlib strip lipo ld as ; do
+    P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
+    T=`xcrun -sdk $SDKROOT -find ${prog}`
+    echo '#!/bin/sh' > $P || exit 1
+    echo 'exec '$T' "$@"' >> $P || exit 1
+    chmod a+x $P || exit 1
+  done
+  # Try to use the platform llvm-gcc. Fall back to gcc if it's not available.
+  for prog in gcc g++ ; do
+    P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
+# FIXME: Uncomment once llvm-gcc works for this
+#    T=`xcrun -find llvm-${prog}`
+#    if [ "x$T" = "x" ] ; then
+      T=`xcrun -sdk $SDKROOT -find ${prog}`
+#    fi
+    echo '#!/bin/sh' > $P || exit 1
+    echo 'exec '$T' -arch armv6 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
+    chmod a+x $P || exit 1
+  done
+
+  PATH=$DIR/bin:$PATH
+# otherwise, try to use llvm-gcc if it's available
+elif [ $DARWIN_VERS -gt 9 ]; then
+  # If the user has set CC or CXX, respect their wishes.  If not,
+  # compile with LLVM-GCC/LLVM-G++ if available; if LLVM is not
+  # available, fall back to usual GCC/G++ default.
+  savedPATH=$PATH ; PATH="/Developer/usr/bin:$PATH"
+  XTMPCC=$(which llvm-gcc)
+  if [ x$CC  = x -a x$XTMPCC != x ] ; then export CC=$XTMPCC  ; fi
+  XTMPCC=$(which llvm-g++)
+  if [ x$CXX = x -a x$XTMPCC != x ] ; then export CXX=$XTMPCC ; fi
+  PATH=$savedPATH
+  unset XTMPCC savedPATH
+fi
+
+
+if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
+  if [ \! -f Makefile.config ]; then
+      $SRC_DIR/configure --prefix=$DT_HOME \
+          --enable-targets=arm \
+          --host=arm-apple-darwin10 \
+          --target=arm-apple-darwin10 \
+          --build=i686-apple-darwin10 \
+          --enable-assertions=$LLVM_ASSERTIONS \
+          --enable-optimized=$LLVM_OPTIMIZED \
+          --disable-bindings \
+          || exit 1
+  fi
+else
+  if [ \! -f Makefile.config ]; then
+      $SRC_DIR/configure --prefix=$DT_HOME/local \
+          --enable-targets=arm,x86,powerpc,cbe \
+          --enable-assertions=$LLVM_ASSERTIONS \
+          --enable-optimized=$LLVM_OPTIMIZED \
+          --disable-bindings \
+          || exit 1
+  fi
 fi
 
 SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/[^.]*\.\([0-9]*\).*/\1/'`
@@ -158,6 +207,7 @@ if [ "x$MAJ_VER" != "x4" -o "x$MIN_VER" != "x0" ]; then
 fi
 
 make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$TARGETS" \
+    UNIVERSAL_SDK_PATH=$HOST_SDKROOT \
     NO_RUNTIME_LIBS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
     LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
diff --git a/utils/findoptdiff b/utils/findoptdiff
index 36620d9..4f8d08d 100755
--- a/utils/findoptdiff
+++ b/utils/findoptdiff
@@ -70,7 +70,7 @@ dis2="$llvm2/Debug/bin/llvm-dis"
 opt1="$llvm1/Debug/bin/opt"
 opt2="$llvm2/Debug/bin/opt"
 
-all_switches="-verify -lowersetjmp -raiseallocs -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -scalarrepl -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -scalarrepl -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify"
+all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -scalarrepl -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -scalarrepl -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify"
 
 #counter=0
 function tryit {
diff --git a/utils/lit/TestRunner.py b/utils/lit/TestRunner.py
index 34e828b..bee1167 100644
--- a/utils/lit/TestRunner.py
+++ b/utils/lit/TestRunner.py
@@ -15,6 +15,10 @@ class InternalShellError(Exception):
 
 # Don't use close_fds on Windows.
 kUseCloseFDs = platform.system() != 'Windows'
+
+# Use temporary files to replace /dev/null on Windows.
+kAvoidDevNull = platform.system() == 'Windows'
+
 def executeCommand(command, cwd=None, env=None):
     p = subprocess.Popen(command, cwd=cwd,
                          stdin=subprocess.PIPE,
@@ -63,21 +67,30 @@ def executeShCmd(cmd, cfg, cwd, results):
     # output. This is null until we have seen some output using
     # stderr.
     for i,j in enumerate(cmd.commands):
+        # Apply the redirections, we use (N,) as a sentinal to indicate stdin,
+        # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
+        # from a file are represented with a list [file, mode, file-object]
+        # where file-object is initially None.
         redirects = [(0,), (1,), (2,)]
         for r in j.redirects:
             if r[0] == ('>',2):
                 redirects[2] = [r[1], 'w', None]
+            elif r[0] == ('>>',2):
+                redirects[2] = [r[1], 'a', None]
             elif r[0] == ('>&',2) and r[1] in '012':
                 redirects[2] = redirects[int(r[1])]
             elif r[0] == ('>&',) or r[0] == ('&>',):
                 redirects[1] = redirects[2] = [r[1], 'w', None]
             elif r[0] == ('>',):
                 redirects[1] = [r[1], 'w', None]
+            elif r[0] == ('>>',):
+                redirects[1] = [r[1], 'a', None]
             elif r[0] == ('<',):
                 redirects[0] = [r[1], 'r', None]
             else:
                 raise NotImplementedError,"Unsupported redirect: %r" % (r,)
 
+        # Map from the final redirections to something subprocess can handle.
         final_redirects = []
         for index,r in enumerate(redirects):
             if r == (0,):
@@ -95,7 +108,10 @@ def executeShCmd(cmd, cfg, cwd, results):
                 result = subprocess.PIPE
             else:
                 if r[2] is None:
-                    r[2] = open(r[0], r[1])
+                    if kAvoidDevNull and r[0] == '/dev/null':
+                        r[2] = tempfile.TemporaryFile(mode=r[1])
+                    else:
+                        r[2] = open(r[0], r[1])
                 result = r[2]
             final_redirects.append(result)
 
@@ -317,7 +333,24 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
 
     return executeCommand(command, cwd=cwd, env=test.config.environment)
 
-def parseIntegratedTestScript(test, xfailHasColon, requireAndAnd):
+def isExpectedFail(xfails, xtargets, target_triple):
+    # Check if any xfail matches this target.
+    for item in xfails:
+        if item == '*' or item in target_triple:
+            break
+    else:
+        return False
+
+    # If so, see if it is expected to pass on this target.
+    #
+    # FIXME: Rename XTARGET to something that makes sense, like XPASS.
+    for item in xtargets:
+        if item == '*' or item in target_triple:
+            return False
+
+    return True
+
+def parseIntegratedTestScript(test, requireAndAnd):
     """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
     script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
     information. The RUN lines also will have variable substitution performed.
@@ -361,12 +394,9 @@ def parseIntegratedTestScript(test, xfailHasColon, requireAndAnd):
                 script[-1] = script[-1][:-1] + ln
             else:
                 script.append(ln)
-        elif xfailHasColon and 'XFAIL:' in ln:
+        elif 'XFAIL:' in ln:
             items = ln[ln.index('XFAIL:') + 6:].split(',')
             xfails.extend([s.strip() for s in items])
-        elif not xfailHasColon and 'XFAIL' in ln:
-            items = ln[ln.index('XFAIL') + 5:].split(',')
-            xfails.extend([s.strip() for s in items])
         elif 'XTARGET:' in ln:
             items = ln[ln.index('XTARGET:') + 8:].split(',')
             xtargets.extend([s.strip() for s in items])
@@ -405,7 +435,8 @@ def parseIntegratedTestScript(test, xfailHasColon, requireAndAnd):
             # Strip off '&&'
             script[i] = ln[:-2]
 
-    return script,xfails,xtargets,tmpBase,execdir
+    isXFail = isExpectedFail(xfails, xtargets, test.suite.config.target_triple)
+    return script,isXFail,tmpBase,execdir
 
 def formatTestOutput(status, out, err, exitCode, script):
     output = StringIO.StringIO()
@@ -428,11 +459,11 @@ def executeTclTest(test, litConfig):
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test, True, False)
+    res = parseIntegratedTestScript(test, False)
     if len(res) == 2:
         return res
 
-    script, xfails, xtargets, tmpBase, execdir = res
+    script, isXFail, tmpBase, execdir = res
 
     if litConfig.noExecute:
         return (Test.PASS, '')
@@ -444,19 +475,6 @@ def executeTclTest(test, litConfig):
     if len(res) == 2:
         return res
 
-    isXFail = False
-    for item in xfails:
-        if item == '*' or item in test.suite.config.target_triple:
-            isXFail = True
-            break
-
-    # If this is XFAIL, see if it is expected to pass on this target.
-    if isXFail:
-        for item in xtargets:
-            if item == '*' or item in test.suite.config.target_triple:
-                isXFail = False
-                break
-
     out,err,exitCode = res
     if isXFail:
         ok = exitCode != 0
@@ -474,11 +492,11 @@ def executeShTest(test, litConfig, useExternalSh, requireAndAnd):
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test, False, requireAndAnd)
+    res = parseIntegratedTestScript(test, requireAndAnd)
     if len(res) == 2:
         return res
 
-    script, xfails, xtargets, tmpBase, execdir = res
+    script, isXFail, tmpBase, execdir = res
 
     if litConfig.noExecute:
         return (Test.PASS, '')
@@ -494,7 +512,7 @@ def executeShTest(test, litConfig, useExternalSh, requireAndAnd):
         return res
 
     out,err,exitCode = res
-    if xfails:
+    if isXFail:
         ok = exitCode != 0
         status = (Test.XPASS, Test.XFAIL)[ok]
     else:
diff --git a/utils/lit/Util.py b/utils/lit/Util.py
index e62a8ed..66c5e46 100644
--- a/utils/lit/Util.py
+++ b/utils/lit/Util.py
@@ -15,7 +15,7 @@ def detectCPUs():
             return int(os.popen2("sysctl -n hw.ncpu")[1].read())
     # Windows:
     if os.environ.has_key("NUMBER_OF_PROCESSORS"):
-        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]);
+        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
         if ncpus > 0:
             return ncpus
     return 1 # Default
diff --git a/utils/lit/lit.py b/utils/lit/lit.py
index 5b24286..a856473 100755
--- a/utils/lit/lit.py
+++ b/utils/lit/lit.py
@@ -16,9 +16,13 @@ from TestingConfig import TestingConfig
 import LitConfig
 import Test
 
+# Configuration files to look for when discovering test suites. These can be
+# overridden with --config-prefix.
+#
 # FIXME: Rename to 'config.lit', 'site.lit', and 'local.lit' ?
-kConfigName = 'lit.cfg'
-kSiteConfigName = 'lit.site.cfg'
+gConfigName = 'lit.cfg'
+gSiteConfigName = 'lit.site.cfg'
+
 kLocalConfigName = 'lit.local.cfg'
 
 class TestingProgressDisplay:
@@ -134,10 +138,10 @@ class Tester(threading.Thread):
         self.display.update(test)
 
 def dirContainsTestSuite(path):
-    cfgpath = os.path.join(path, kSiteConfigName)
+    cfgpath = os.path.join(path, gSiteConfigName)
     if os.path.exists(cfgpath):
         return cfgpath
-    cfgpath = os.path.join(path, kConfigName)
+    cfgpath = os.path.join(path, gConfigName)
     if os.path.exists(cfgpath):
         return cfgpath
 
@@ -268,7 +272,7 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
         file_sourcepath = os.path.join(source_path, filename)
         if not os.path.isdir(file_sourcepath):
             continue
-        
+
         # Check for nested test suites, first in the execpath in case there is a
         # site configuration and then in the source path.
         file_execpath = ts.getExecPath(path_in_suite + (filename,))
@@ -283,7 +287,7 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
             subiter = getTestsInSuite(ts, path_in_suite + (filename,),
                                       litConfig, testSuiteCache,
                                       localConfigCache)
-        
+
         for res in subiter:
             yield res
 
@@ -314,6 +318,9 @@ def main():
     parser.add_option("-j", "--threads", dest="numThreads", metavar="N",
                       help="Number of testing threads",
                       type=int, action="store", default=None)
+    parser.add_option("", "--config-prefix", dest="configPrefix",
+                      metavar="NAME", help="Prefix for 'lit' config files",
+                      action="store", default=None)
 
     group = OptionGroup(parser, "Output Format")
     # FIXME: I find these names very confusing, although I like the
@@ -379,6 +386,11 @@ def main():
     if not args:
         parser.error('No inputs specified')
 
+    if opts.configPrefix is not None:
+        global gConfigName, gSiteConfigName
+        gConfigName = '%s.cfg' % opts.configPrefix
+        gSiteConfigName = '%s.site.cfg' % opts.configPrefix
+
     if opts.numThreads is None:
         opts.numThreads = Util.detectCPUs()
 
@@ -413,7 +425,8 @@ def main():
 
     if opts.showSuites:
         suitesAndTests = dict([(ts,[])
-                               for ts,_ in testSuiteCache.values()])
+                               for ts,_ in testSuiteCache.values()
+                               if ts])
         for t in tests:
             suitesAndTests[t.suite].append(t)
 
diff --git a/utils/unittest/UnitTestMain/Makefile b/utils/unittest/UnitTestMain/Makefile
index aadff21..7b49191 100644
--- a/utils/unittest/UnitTestMain/Makefile
+++ b/utils/unittest/UnitTestMain/Makefile
@@ -10,8 +10,6 @@
 LEVEL = ../../..
 
 include $(LEVEL)/Makefile.config
-NO_MISSING_FIELD_INITIALIZERS := $(shell $(CXX) -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers)
-NO_VARIADIC_MACROS := $(shell $(CXX) -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros)
 
 LIBRARYNAME = UnitTestMain
 BUILD_ARCHIVE = 1
diff --git a/utils/unittest/googletest/Makefile b/utils/unittest/googletest/Makefile
index 29fe679..2d2c282 100644
--- a/utils/unittest/googletest/Makefile
+++ b/utils/unittest/googletest/Makefile
@@ -10,8 +10,6 @@
 LEVEL := ../../..
 
 include $(LEVEL)/Makefile.config
-NO_MISSING_FIELD_INITIALIZERS := $(shell $(CXX) -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers)
-NO_VARIADIC_MACROS := $(shell $(CXX) -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros)
 
 LIBRARYNAME = GoogleTest
 BUILD_ARCHIVE = 1
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 2cc266b..451013e 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -34,7 +34,7 @@ syn keyword llvmStatement phi call select shl lshr ashr va_arg
 syn keyword llvmStatement trunc zext sext
 syn keyword llvmStatement fptrunc fpext fptoui fptosi uitofp sitofp
 syn keyword llvmStatement ptrtoint inttoptr bitcast
-syn keyword llvmStatement ret br switch invoke unwind unreachable
+syn keyword llvmStatement ret br indirectbr switch invoke unwind unreachable
 syn keyword llvmStatement malloc alloca free load store getelementptr
 syn keyword llvmStatement extractelement insertelement shufflevector
 syn keyword llvmStatement extractvalue insertvalue
@@ -56,6 +56,7 @@ syn keyword llvmKeyword noredzone noimplicitfloat naked
 syn keyword llvmKeyword module asm align tail to
 syn keyword llvmKeyword addrspace section alias sideeffect c gc
 syn keyword llvmKeyword target datalayout triple
+syn keyword llvmKeyword blockaddress
 
 " Obsolete keywords.
 syn keyword llvmError  uninitialized implementation
author	rdivacky <rdivacky@FreeBSD.org>	2009-11-04 14:58:56 +0000
committer	rdivacky <rdivacky@FreeBSD.org>	2009-11-04 14:58:56 +0000
commit	7ff99155c39edd73ebf1c6adfa023b1048fee9a4 (patch)
tree	b4dc751bcee540346911aa4115729eff2f991657
parent	d1f06de484602e72707476a6152974847bac1570 (diff)
download	FreeBSD-src-7ff99155c39edd73ebf1c6adfa023b1048fee9a4.zip FreeBSD-src-7ff99155c39edd73ebf1c6adfa023b1048fee9a4.tar.gz