Import LLVM, at r72995.

We should now have support for #pragma weak.
author: ed <ed@FreeBSD.org> 2009-06-06 08:20:29 +0000
committer: ed <ed@FreeBSD.org> 2009-06-06 08:20:29 +0000
commit: 2361a5c2bfbaef476824e51fa72712e334219c7b (patch)
tree: 8a1bbd1a5b838080d31e5c93a1817006b8c62318
parent: 1941b8772a36a33c7b86cb67163cd735b3d57221 (diff)
download: FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.zip
FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.tar.gz
372 files changed, 4347 insertions, 2884 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ba63484..8133398 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,13 +58,22 @@ endif( MSVC )
 option(LLVM_ENABLE_THREADS "Use threads if available." ON)
 
 if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
-  option(LLVM_ENABLE_ASSERTS "Enable asserts" OFF)
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF)
 else()
-  option(LLVM_ENABLE_ASSERTS "Enable asserts" ON)
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
 endif()
 
-if( LLVM_ENABLE_ASSERTS )
-  add_definitions( -D_DEBUG -UNDEBUG )
+if( LLVM_ENABLE_ASSERTIONS )
+  add_definitions( -D_DEBUG )
+  # On Release builds cmake automatically defines NDEBUG, so we
+  # explicitly undefine it:
+  if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    add_definitions( -UNDEBUG )
+  endif()
+else()
+  if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    add_definitions( -DNDEBUG )
+  endif()
 endif()
 
 if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
@@ -81,6 +90,24 @@ endforeach(c)
 
 set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
 
+# The USE_EXPLICIT_DEPENDENCIES variable will be TRUE to indicate that
+# we should use the library dependencies explicitly specified in the
+# CMakeLists.txt files rather than those determined by
+# llvm-config. This value must be true for non-make and IDE
+# generators.
+if (MSVC_IDE)
+  set(DEFAULT_USE_EXPLICIT_DEPENDENCIES ON)
+elseif (XCODE)
+  set(DEFAULT_USE_EXPLICIT_DEPENDENCIES ON)
+else ()
+ set(DEFAULT_USE_EXPLICIT_DEPENDENCIES OFF)
+endif ()
+
+option(USE_EXPLICIT_DEPENDENCIES 
+  "Use explicit dependencies instead of llvm-config" 
+  ${DEFAULT_USE_EXPLICIT_DEPENDENCIES})
+mark_as_advanced(USE_EXPLICIT_DEPENDENCIES)
+
 # Add path for custom modules
 set(CMAKE_MODULE_PATH
   ${CMAKE_MODULE_PATH}
@@ -123,11 +150,13 @@ include(config-ix)
 
 option(LLVM_ENABLE_PIC "Build Position-Independent Code" OFF)
 
+set(ENABLE_PIC 0)
 if( LLVM_ENABLE_PIC )
   if( SUPPORTS_FPIC_FLAG )
     message(STATUS "Building with -fPIC")
     add_llvm_definitions(-fPIC)
-  else( SUPPORTS_FPIC_FLAG )
+    set(ENABLE_PIC 1)
+ else( SUPPORTS_FPIC_FLAG )
     message(STATUS "Warning: -fPIC not supported.")
   endif()
 endif()
diff --git a/Makefile.rules b/Makefile.rules
index caa3335..4a77bf5 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -1066,7 +1066,7 @@ all-local:: $(LibName.O)
 
 $(LibName.O): $(ObjectsO) $(LibDir)/.dir
 	$(Echo) Linking $(BuildMode) Object Library $(notdir $@)
-	$(Verb) $(Relink) -Wl,-r -nodefaultlibs -nostdlib -nostartfiles -o $@ $(ObjectsO)
+	$(Verb) $(Relink) -r -nodefaultlibs -nostdlib -nostartfiles -o $@ $(ObjectsO)
 
 clean-local::
 ifneq ($(strip $(LibName.O)),)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 7f625d4..64b78a4 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -279,7 +279,7 @@ dnl===-----------------------------------------------------------------------===
 
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
- [--enable-optimized,Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
+ --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
 if test ${enableval} = "no" ; then
   AC_SUBST(ENABLE_OPTIMIZED,[[]])
 else
@@ -288,7 +288,7 @@ fi
 
 dnl --enable-profiling : check whether they want to do a profile build:
 AC_ARG_ENABLE(profiling, AS_HELP_STRING(
- [--enable-profiling,Compile with profiling enabled (default is NO)]),,enableval="no")
+ --enable-profiling,[Compile with profiling enabled (default is NO)]),,enableval="no")
 if test ${enableval} = "no" ; then
   AC_SUBST(ENABLE_PROFILING,[[]])
 else
@@ -297,7 +297,7 @@ fi
 
 dnl --enable-assertions : check whether they want to turn on assertions or not:
 AC_ARG_ENABLE(assertions,AS_HELP_STRING(
-  [--enable-assertions,Compile with assertion checks enabled (default is YES)]),, enableval="yes")
+  --enable-assertions,[Compile with assertion checks enabled (default is YES)]),, enableval="yes")
 if test ${enableval} = "yes" ; then
   AC_SUBST(DISABLE_ASSERTIONS,[[]])
 else
@@ -306,7 +306,7 @@ fi
 
 dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
 AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
-  [--enable-expensive-checks,Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
+  --enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
 if test ${enableval} = "yes" ; then
   AC_SUBST(ENABLE_EXPENSIVE_CHECKS,[[ENABLE_EXPENSIVE_CHECKS=1]])
   AC_SUBST(EXPENSIVE_CHECKS,[[yes]])
@@ -317,7 +317,7 @@ fi
 
 dnl --enable-debug-runtime : should runtime libraries have debug symbols?
 AC_ARG_ENABLE(debug-runtime,
-   AS_HELP_STRING([--enable-debug-runtime,Build runtime libs with debug symbols (default is NO)]),,enableval=no)
+   AS_HELP_STRING(--enable-debug-runtime,[Build runtime libs with debug symbols (default is NO)]),,enableval=no)
 if test ${enableval} = "no" ; then
   AC_SUBST(DEBUG_RUNTIME,[[]])
 else
@@ -553,6 +553,16 @@ if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
   fi
 fi
 
+dnl --enable-libffi : check whether the user wants to turn off libffi:
+AC_ARG_ENABLE(libffi,AS_HELP_STRING(
+  --enable-libffi,[Check for the presence of libffi (default is YES)]),,
+  enableval=yes)
+case "$enableval" in
+  yes) llvm_cv_enable_libffi="yes" ;;
+  no)  llvm_cv_enable_libffi="no"  ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-libffi. Use "yes" or "no"]) ;;
+esac
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 4: Check for programs we need and that they are the right version
@@ -769,9 +779,11 @@ AC_SEARCH_LIBS(dlopen,dl,AC_DEFINE([HAVE_DLOPEN],[1],
                AC_MSG_WARN([dlopen() not found - disabling plugin support]))
 
 dnl libffi is optional; used to call external functions from the interpreter
-AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
-               [Define if libffi is available on this platform.]),
-               AC_MSG_WARN([libffi not found - disabling external calls from interpreter]))
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
+                 [Define if libffi is available on this platform.]),
+                 AC_MSG_WARN([libffi not found - disabling external calls from interpreter]))
+fi
 
 dnl mallinfo is optional; the code can compile (minus features) without it
 AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
@@ -836,7 +848,9 @@ else
 fi
 
 dnl Try to find ffi.h.
-AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
+fi
 
 dnl===-----------------------------------------------------------------------===
 dnl===
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 0c903a1..c18ac44 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -1,6 +1,7 @@
 
 # include checks
 include(CheckIncludeFile)
+check_include_file(alloca.h HAVE_ALLOCA_H)
 check_include_file(argz.h HAVE_ARGZ_H)
 check_include_file(assert.h HAVE_ASSERT_H)
 check_include_file(dirent.h HAVE_DIRENT_H)
@@ -34,6 +35,7 @@ check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
 check_include_file(sys/stat.h HAVE_SYS_STAT_H)
 check_include_file(sys/time.h HAVE_SYS_TIME_H)
 check_include_file(sys/types.h HAVE_SYS_TYPES_H)
+check_include_file(sys/wait.h HAVE_SYS_WAIT_H)
 check_include_file(termios.h HAVE_TERMIOS_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(utime.h HAVE_UTIME_H)
@@ -47,6 +49,7 @@ check_library_exists(dl dlopen "" HAVE_LIBDL)
 # function checks
 include(CheckSymbolExists)
 include(CheckFunctionExists)
+check_symbol_exists(alloca alloca.h HAVE_ALLOCA)
 check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
 check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE)
 check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT)
@@ -58,6 +61,8 @@ check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
 check_symbol_exists(ceilf math.h HAVE_CEILF)
 check_symbol_exists(floorf math.h HAVE_FLOORF)
 check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
+check_symbol_exists(malloc_zone_statistics malloc/malloc.h
+                    HAVE_MALLOC_ZONE_STATISTICS)
 check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
 
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index c531298..a21ed20 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -26,11 +26,11 @@ macro(add_llvm_executable name)
   if( LLVM_LINK_COMPONENTS )
     llvm_config(${name} ${LLVM_LINK_COMPONENTS})
   endif( LLVM_LINK_COMPONENTS )
-  if( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
     target_link_libraries(${name} ${llvm_libs})
-  else( MSVC )
+  else( )
     add_dependencies(${name} llvm-config.target)
-  endif( MSVC )
+  endif( )
   get_system_libs(llvm_system_libs)
   if( llvm_system_libs )
     target_link_libraries(${name} ${llvm_system_libs})
diff --git a/cmake/modules/AddPartiallyLinkedObject.cmake b/cmake/modules/AddPartiallyLinkedObject.cmake
index c9d96de..d20666d 100755
--- a/cmake/modules/AddPartiallyLinkedObject.cmake
+++ b/cmake/modules/AddPartiallyLinkedObject.cmake
@@ -1,18 +1,18 @@
 include(LLVMProcessSources)
 
 macro(target_name_of_partially_linked_object lib var)
-  if( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
     set(${var} ${lib})
-  else( MSVC )
+  else( )
     set(${var} ${lib}_pll)
-  endif( MSVC )
+  endif( )
 endmacro(target_name_of_partially_linked_object lib var)
 
 
 macro(add_partially_linked_object lib)
-  if( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
     add_llvm_library( ${lib} ${ARGN})
-  else( MSVC )
+  else( )
     set(pll ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${lib}.o)
     set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/temp_lib)
     set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/temp_lib)
@@ -36,7 +36,7 @@ macro(add_partially_linked_object lib)
     add_custom_target(${tnplo} ALL DEPENDS ${pll})
     set( llvm_libs ${llvm_libs} ${pll} PARENT_SCOPE)
     set( llvm_lib_targets ${llvm_lib_targets} ${tnplo} PARENT_SCOPE )
-  endif( MSVC )
+  endif( )
   install(FILES ${pll}
     DESTINATION lib)
 endmacro(add_partially_linked_object lib)
diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVMConfig.cmake
index 9776fb0..fd92495 100755
--- a/cmake/modules/LLVMConfig.cmake
+++ b/cmake/modules/LLVMConfig.cmake
@@ -21,36 +21,40 @@ endfunction(get_system_libs)
 
 macro(llvm_config executable)
   # extra args is the list of link components.
-  if( MSVC )
-    msvc_llvm_config(${executable} ${ARGN})
-  else( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
+    explicit_llvm_config(${executable} ${ARGN})
+  else( )
     nix_llvm_config(${executable} ${ARGN})
-  endif( MSVC )
+  endif( )
 endmacro(llvm_config)
 
 
-function(msvc_llvm_config executable)
+function(explicit_llvm_config executable)
   set( link_components ${ARGN} )
-  if( CMAKE_CL_64 )
-    set(include_lflag "/INCLUDE:")
-  else( CMAKE_CL_64 )
-    set(include_lflag "/INCLUDE:_")
-  endif()
-  foreach(c ${link_components})
-    if( c STREQUAL "jit" )
-      set(lfgs "${lfgs} ${include_lflag}X86TargetMachineModule")
-    endif( c STREQUAL "jit" )
-    list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
-    if( NOT idx LESS 0 )
-      set(lfgs "${lfgs} ${include_lflag}${c}TargetMachineModule")
-      list(FIND LLVM_ASMPRINTERS_FORCE_LINK ${c} idx)
+
+  set(lfgs)
+  if (MSVC)
+    if( CMAKE_CL_64 )
+      set(include_lflag "/INCLUDE:")
+    else( CMAKE_CL_64 )
+      set(include_lflag "/INCLUDE:_")
+    endif()
+    foreach(c ${link_components})
+      if( c STREQUAL "jit" )
+        set(lfgs "${lfgs} ${include_lflag}X86TargetMachineModule")
+      endif( c STREQUAL "jit" )
+      list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
       if( NOT idx LESS 0 )
-	set(lfgs "${lfgs} ${include_lflag}${c}AsmPrinterForceLink")
+        set(lfgs "${lfgs} ${include_lflag}${c}TargetMachineModule")
+        list(FIND LLVM_ASMPRINTERS_FORCE_LINK ${c} idx)
+        if( NOT idx LESS 0 )
+	  set(lfgs "${lfgs} ${include_lflag}${c}AsmPrinterForceLink")
+        endif()
       endif()
-    endif()
-  endforeach(c)
+    endforeach(c)
+  endif ()
 
-  msvc_map_components_to_libraries(LIBRARIES ${link_components})
+  explicit_map_components_to_libraries(LIBRARIES ${link_components})
   target_link_libraries(${executable} ${LIBRARIES})
 
   if( lfgs )
@@ -58,10 +62,10 @@ function(msvc_llvm_config executable)
       PROPERTIES
       LINK_FLAGS ${lfgs})
   endif()
-endfunction(msvc_llvm_config)
+endfunction(explicit_llvm_config)
 
 
-function(msvc_map_components_to_libraries out_libs)
+function(explicit_map_components_to_libraries out_libs)
   set( link_components ${ARGN} )
   foreach(c ${link_components})
     # add codegen/asmprinter
@@ -121,7 +125,7 @@ function(msvc_map_components_to_libraries out_libs)
   endwhile( ${curr_idx} LESS ${lst_size} )
   list(REMOVE_DUPLICATES result)
   set(${out_libs} ${result} PARENT_SCOPE)
-endfunction(msvc_map_components_to_libraries)
+endfunction(explicit_map_components_to_libraries)
 
 
 macro(nix_llvm_config executable)
diff --git a/configure b/configure
index 1179524..9a30c29 100755
--- a/configure
+++ b/configure
@@ -1533,12 +1533,15 @@ if test -n "$ac_init_help"; then
 Optional Features:
   --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
-  --enable-optimized
-  --enable-profiling
-  --enable-assertions
+  --enable-optimized      Compile with optimizations enabled (default is NO)
+  --enable-profiling      Compile with profiling enabled (default is NO)
+  --enable-assertions     Compile with assertion checks enabled (default is
+                          YES)
   --enable-expensive-checks
-
-  --enable-debug-runtime
+                          Compile with expensive debug checks enabled (default
+                          is NO)
+  --enable-debug-runtime  Build runtime libs with debug symbols (default is
+                          NO)
   --enable-jit            Enable Just In Time Compiling (default is YES)
   --enable-doxygen        Build doxygen documentation (default is NO)
   --enable-threads        Use threads if available (default is YES)
@@ -1550,6 +1553,7 @@ Optional Features:
                           %a (default is YES)
   --enable-bindings       Build specific language bindings:
                           all,auto,none,{binding-name} (default=auto)
+  --enable-libffi         Check for the presence of libffi (default is YES)
   --enable-ltdl-install   install libltdl
   --enable-shared[=PKGS]  build shared libraries
                           [default=yes]
@@ -5111,6 +5115,21 @@ echo "$as_me: error: Invalid path to directory containing plugin-api.h." >&2;}
   fi
 fi
 
+# Check whether --enable-libffi was given.
+if test "${enable_libffi+set}" = set; then
+  enableval=$enable_libffi;
+else
+  enableval=yes
+fi
+
+case "$enableval" in
+  yes) llvm_cv_enable_libffi="yes" ;;
+  no)  llvm_cv_enable_libffi="no"  ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
@@ -10575,7 +10594,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10578 "configure"
+#line 10597 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12719,7 +12738,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 12722 "configure"' > conftest.$ac_ext
+  echo '#line 12741 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -14437,11 +14456,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14440: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14459: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14444: \$? = $ac_status" >&5
+   echo "$as_me:14463: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14705,11 +14724,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14708: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14727: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14712: \$? = $ac_status" >&5
+   echo "$as_me:14731: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14809,11 +14828,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14812: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14831: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:14816: \$? = $ac_status" >&5
+   echo "$as_me:14835: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -17261,7 +17280,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17264 "configure"
+#line 17283 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17361,7 +17380,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17364 "configure"
+#line 17383 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19729,11 +19748,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19732: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:19751: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:19736: \$? = $ac_status" >&5
+   echo "$as_me:19755: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -19833,11 +19852,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19836: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:19855: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:19840: \$? = $ac_status" >&5
+   echo "$as_me:19859: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -21403,11 +21422,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21406: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21425: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:21410: \$? = $ac_status" >&5
+   echo "$as_me:21429: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -21507,11 +21526,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21510: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21529: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:21514: \$? = $ac_status" >&5
+   echo "$as_me:21533: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -23742,11 +23761,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:23745: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:23764: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:23749: \$? = $ac_status" >&5
+   echo "$as_me:23768: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24010,11 +24029,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24013: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24032: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24017: \$? = $ac_status" >&5
+   echo "$as_me:24036: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24114,11 +24133,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24117: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24136: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:24121: \$? = $ac_status" >&5
+   echo "$as_me:24140: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -27473,7 +27492,8 @@ echo "$as_me: WARNING: dlopen() not found - disabling plugin support" >&2;}
 fi
 
 
-{ echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  { echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
 echo $ECHO_N "checking for library containing ffi_call... $ECHO_C" >&6; }
 if test "${ac_cv_search_ffi_call+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
@@ -27579,6 +27599,7 @@ else
 echo "$as_me: WARNING: libffi not found - disabling external calls from interpreter" >&2;}
 fi
 
+fi
 
 { echo "$as_me:$LINENO: checking for library containing mallinfo" >&5
 echo $ECHO_N "checking for library containing mallinfo... $ECHO_C" >&6; }
@@ -29824,6 +29845,7 @@ else
 
 fi
 
+if test "$llvm_cv_enable_libffi" = "yes" ; then
 
 
 for ac_header in ffi.h ffi/ffi.h
@@ -29994,6 +30016,7 @@ fi
 
 done
 
+fi
 
 
 
diff --git a/docs/CMake.html b/docs/CMake.html
index b329ca4..ac3b57a 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -248,8 +248,8 @@
   <dt><b>LLVM_ENABLE_THREADS</b>:BOOL</dt>
   <dd>Build with threads support, if available. Defaults to ON.</dd>
 
-  <dt><b>LLVM_ENABLE_ASSERTS</b>:BOOL</dt>
-  <dd>Enables code asserts. Defaults to ON if and only if
+  <dt><b>LLVM_ENABLE_ASSERTIONS</b>:BOOL</dt>
+  <dd>Enables code assertions. Defaults to ON if and only if
     CMAKE_BUILD_TYPE is <i>Release</i>.</dd>
 
   <dt><b>LLVM_ENABLE_PIC</b>:BOOL</dt>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 6be5277..17a50ac 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -153,12 +153,21 @@ $ llvm-gcc -use-gold-plugin a.o b.o -o main # &lt;-- link with LLVMgold plugin
     <pre class="doc_code">
 export CC="$PREFIX/bin/llvm-gcc -use-gold-plugin"
 export CXX="$PREFIX/bin/llvm-g++ -use-gold-plugin"
-export AR="$PREFIX/bin/ar --plugin libLLVMgold.so"
-export NM="$PREFIX/bin/nm --plugin libLLVMgold.so"
+export AR="$PREFIX/bin/ar"
+export NM="$PREFIX/bin/nm"
 export RANLIB=/bin/true #ranlib is not needed, and doesn't support .bc files in .a
 export CFLAGS="-O4"
      </pre>
      </li>
+     <li>Or you can just set your path:
+    <pre class="doc_code">
+export PATH="$PREFIX/bin:$PATH"
+export CC="llvm-gcc -use-gold-plugin"
+export CXX="llvm-g++ -use-gold-plugin"
+export RANLIB=/bin/true
+export CFLAGS="-O4"
+     </pre>
+     </li>
      <li>Configure &amp; build the project as usual: <tt>./configure &amp;&amp; make &amp;&amp; make check</tt> </li>
    </ul>
    <p> The environment variable settings may work for non-autotooled projects
diff --git a/docs/LangRef.html b/docs/LangRef.html
index f0a171be..89d4f93 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -89,8 +89,11 @@
       <li><a href="#binaryops">Binary Operations</a>
         <ol>
           <li><a href="#i_add">'<tt>add</tt>' Instruction</a></li>
+          <li><a href="#i_fadd">'<tt>fadd</tt>' Instruction</a></li>
           <li><a href="#i_sub">'<tt>sub</tt>' Instruction</a></li>
+          <li><a href="#i_fsub">'<tt>fsub</tt>' Instruction</a></li>
           <li><a href="#i_mul">'<tt>mul</tt>' Instruction</a></li>
+          <li><a href="#i_fmul">'<tt>fmul</tt>' Instruction</a></li>
           <li><a href="#i_udiv">'<tt>udiv</tt>' Instruction</a></li>
           <li><a href="#i_sdiv">'<tt>sdiv</tt>' Instruction</a></li>
           <li><a href="#i_fdiv">'<tt>fdiv</tt>' Instruction</a></li>
@@ -2503,16 +2506,15 @@ The result value has the same type as its operands.</p>
 <h5>Arguments:</h5>
 
 <p>The two arguments to the '<tt>add</tt>' instruction must be <a
- href="#t_integer">integer</a>, <a href="#t_floating">floating point</a>, or
- <a href="#t_vector">vector</a> values. Both arguments must have identical
- types.</p>
+ href="#t_integer">integer</a> or
+ <a href="#t_vector">vector</a> of integer values. Both arguments must
+ have identical types.</p>
 
 <h5>Semantics:</h5>
 
-<p>The value produced is the integer or floating point sum of the two
-operands.</p>
+<p>The value produced is the integer sum of the two operands.</p>
 
-<p>If an integer sum has unsigned overflow, the result returned is the
+<p>If the sum has unsigned overflow, the result returned is the
 mathematical result modulo 2<sup>n</sup>, where n is the bit width of
 the result.</p>
 
@@ -2527,6 +2529,39 @@ instruction is appropriate for both signed and unsigned integers.</p>
 </div>
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
+  <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+  &lt;result&gt; = fadd &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>fadd</tt>' instruction returns the sum of its two operands.</p>
+
+<h5>Arguments:</h5>
+
+<p>The two arguments to the '<tt>fadd</tt>' instruction must be
+<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+floating point values. Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+
+<p>The value produced is the floating point sum of the two operands.</p>
+
+<h5>Example:</h5>
+
+<pre>
+  &lt;result&gt; = fadd float 4.0, %var          <i>; yields {float}:result = 4.0 + %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
    <a name="i_sub">'<tt>sub</tt>' Instruction</a>
 </div>
 
@@ -2550,16 +2585,14 @@ representations.</p>
 <h5>Arguments:</h5>
 
 <p>The two arguments to the '<tt>sub</tt>' instruction must be <a
- href="#t_integer">integer</a>, <a href="#t_floating">floating point</a>,
- or <a href="#t_vector">vector</a> values.  Both arguments must have identical
- types.</p>
+ href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+ integer values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
 
-<p>The value produced is the integer or floating point difference of
-the two operands.</p>
+<p>The value produced is the integer difference of the two operands.</p>
 
-<p>If an integer difference has unsigned overflow, the result returned is the
+<p>If the difference has unsigned overflow, the result returned is the
 mathematical result modulo 2<sup>n</sup>, where n is the bit width of
 the result.</p>
 
@@ -2575,6 +2608,45 @@ instruction is appropriate for both signed and unsigned integers.</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
+   <a name="i_fsub">'<tt>fsub</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+  &lt;result&gt; = fsub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>fsub</tt>' instruction returns the difference of its two
+operands.</p>
+
+<p>Note that the '<tt>fsub</tt>' instruction is used to represent the
+'<tt>fneg</tt>' instruction present in most other intermediate
+representations.</p>
+
+<h5>Arguments:</h5>
+
+<p>The two arguments to the '<tt>fsub</tt>' instruction must be <a
+ <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
+ of floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+
+<p>The value produced is the floating point difference of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fsub float 4.0, %var           <i>; yields {float}:result = 4.0 - %var</i>
+  &lt;result&gt; = fsub float -0.0, %val          <i>; yields {float}:result = -%var</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
   <a name="i_mul">'<tt>mul</tt>' Instruction</a>
 </div>
 
@@ -2590,16 +2662,14 @@ operands.</p>
 <h5>Arguments:</h5>
 
 <p>The two arguments to the '<tt>mul</tt>' instruction must be <a
-href="#t_integer">integer</a>, <a href="#t_floating">floating point</a>,
-or <a href="#t_vector">vector</a> values.  Both arguments must have identical
-types.</p>
+href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+values.  Both arguments must have identical types.</p>
  
 <h5>Semantics:</h5>
 
-<p>The value produced is the integer or floating point product of the
-two operands.</p>
+<p>The value produced is the integer product of the two operands.</p>
 
-<p>If the result of an integer multiplication has unsigned overflow,
+<p>If the result of the multiplication has unsigned overflow,
 the result returned is the mathematical result modulo 
 2<sup>n</sup>, where n is the bit width of the result.</p>
 <p>Because LLVM integers use a two's complement representation, and the
@@ -2614,6 +2684,35 @@ width of the full product.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_fmul">'<tt>fmul</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The  '<tt>fmul</tt>' instruction returns the product of its two
+operands.</p>
+
+<h5>Arguments:</h5>
+
+<p>The two arguments to the '<tt>fmul</tt>' instruction must be
+<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
+of floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+
+<p>The value produced is the floating point product of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_udiv">'<tt>udiv</tt>' Instruction
 </a></div>
 <div class="doc_text">
@@ -7110,7 +7209,7 @@ declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-03 12:20:10 +0200 (Wed, 03 Jun 2009) $
+  Last modified: $Date: 2009-06-05 00:49:04 +0200 (Fri, 05 Jun 2009) $
 </address>
 
 </body>
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 8198617..5d92fc5 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -19,6 +19,8 @@
 #include <stdbool.h>
 #include <stddef.h>
 
+#define LTO_API_VERSION 3
+
 typedef enum {
     LTO_SYMBOL_ALIGNMENT_MASK         = 0x0000001F,    /* log2 of alignment */
     LTO_SYMBOL_PERMISSIONS_MASK       = 0x000000E0,    
@@ -208,6 +210,14 @@ lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path);
 
 
 /**
+ * Sets the location of the assembler tool to run. If not set, libLTO
+ * will use gcc to invoke the assembler.
+ */
+extern void
+lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path);
+
+
+/**
  * Adds to a list of all global symbols that must exist in the final
  * generated code.  If a function is not listed, it might be
  * inlined into every usage and optimized away.
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index b405f5b..347e239 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -270,12 +270,17 @@ protected:
         NewBBIDom = PredBlocks[i];
         break;
       }
-    assert(i != PredBlocks.size() && "No reachable preds?");
+
+    // It's possible that none of the predecessors of NewBB are reachable;
+    // in that case, NewBB itself is unreachable, so nothing needs to be
+    // changed.
+    if (!NewBBIDom)
+      return;
+
     for (i = i + 1; i < PredBlocks.size(); ++i) {
       if (DT.isReachableFromEntry(PredBlocks[i]))
         NewBBIDom = DT.findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
     }
-    assert(NewBBIDom && "No immediate dominator found??");
 
     // Create the new dominator tree node... and set the idom of NewBB.
     DomTreeNodeBase<NodeT> *NewBBNode = DT.addNewBlock(NewBB, NewBBIDom);
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 7e0de47..b40fbf0 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -60,12 +60,7 @@ namespace llvm {
     /// canonical induction variable of the specified type for the specified
     /// loop (inserting one if there is none).  A canonical induction variable
     /// starts at zero and steps by one on each iteration.
-    Value *getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty){
-      assert(Ty->isInteger() && "Can only insert integer induction variables!");
-      SCEVHandle H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                      SE.getIntegerSCEV(1, Ty), L);
-      return expand(H);
-    }
+    Value *getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty);
 
     /// addInsertedValue - Remember the specified instruction as being the
     /// canonical form for the specified SCEV.
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 972dbfa..a594e32 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -54,13 +54,17 @@ const Attributes Alignment = 31<<16; ///< Alignment of parameter (5 bits)
                                      // stored as log2 of alignment with +1 bias
                                      // 0 means unaligned different from align 1
 const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer
+const Attributes NoRedZone = 1<<22; /// disable redzone
+const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
+                                          /// instructions.
 
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 
 /// @brief Attributes that only apply to function.
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | 
-  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq;
+  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
+  NoRedZone | NoImplicitFloat;
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
 const Attributes VarArgsIncompatible = StructRet;
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 81a7c60..bf6b76e 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -89,7 +89,7 @@ public:
   /// emitByte - This callback is invoked when a byte needs to be written to the
   /// output stream.
   ///
-  void emitByte(unsigned char B) {
+  void emitByte(uint8_t B) {
     if (CurBufferPtr != BufferEnd)
       *CurBufferPtr++ = B;
   }
@@ -99,10 +99,10 @@ public:
   ///
   void emitWordLE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -113,10 +113,10 @@ public:
   ///
   void emitWordBE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -127,14 +127,14 @@ public:
   ///
   void emitDWordLE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -145,14 +145,14 @@ public:
   ///
   void emitDWordBE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -166,8 +166,8 @@ public:
     if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
       // Move the current buffer ptr up to the specified alignment.
       CurBufferPtr =
-        (unsigned char*)(((uintptr_t)CurBufferPtr+Alignment-1) &
-                         ~(uintptr_t)(Alignment-1));
+        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
+                   ~(uintptr_t)(Alignment-1));
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -178,7 +178,7 @@ public:
   /// written to the output stream.
   void emitULEB128Bytes(unsigned Value) {
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       if (Value) Byte |= 0x80;
       emitByte(Byte);
@@ -187,12 +187,12 @@ public:
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
   /// written to the output stream.
-  void emitSLEB128Bytes(int Value) {
-    int Sign = Value >> (8 * sizeof(Value) - 1);
+  void emitSLEB128Bytes(int32_t Value) {
+    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
     bool IsMore;
   
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
       if (IsMore) Byte |= 0x80;
@@ -205,14 +205,14 @@ public:
   void emitString(const std::string &String) {
     for (unsigned i = 0, N = static_cast<unsigned>(String.size());
          i < N; ++i) {
-      unsigned char C = String[i];
+      uint8_t C = String[i];
       emitByte(C);
     }
     emitByte(0);
   }
   
   /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int Value) {
+  void emitInt32(int32_t Value) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *((uint32_t*)CurBufferPtr) = Value;
       CurBufferPtr += 4;
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 226c4c2..aaa41a4 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -50,14 +50,14 @@ class MachineCodeEmitter {
 protected:
   /// BufferBegin/BufferEnd - Pointers to the start and end of the memory
   /// allocated for this code buffer.
-  unsigned char *BufferBegin, *BufferEnd;
+  uint8_t *BufferBegin, *BufferEnd;
   
   /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting 
   /// code.  This is guranteed to be in the range [BufferBegin,BufferEnd].  If
   /// this pointer is at BufferEnd, it will never move due to code emission, and
   /// all code emission requests will be ignored (this is the buffer overflow
   /// condition).
-  unsigned char *CurBufferPtr;
+  uint8_t *CurBufferPtr;
 
 public:
   virtual ~MachineCodeEmitter() {}
@@ -96,7 +96,7 @@ public:
   /// emitByte - This callback is invoked when a byte needs to be written to the
   /// output stream.
   ///
-  void emitByte(unsigned char B) {
+  void emitByte(uint8_t B) {
     if (CurBufferPtr != BufferEnd)
       *CurBufferPtr++ = B;
   }
@@ -106,10 +106,10 @@ public:
   ///
   void emitWordLE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -120,10 +120,10 @@ public:
   ///
   void emitWordBE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -134,14 +134,14 @@ public:
   ///
   void emitDWordLE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -152,14 +152,14 @@ public:
   ///
   void emitDWordBE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -173,8 +173,8 @@ public:
     if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
       // Move the current buffer ptr up to the specified alignment.
       CurBufferPtr =
-        (unsigned char*)(((uintptr_t)CurBufferPtr+Alignment-1) &
-                         ~(uintptr_t)(Alignment-1));
+        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
+                   ~(uintptr_t)(Alignment-1));
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -185,7 +185,7 @@ public:
   /// written to the output stream.
   void emitULEB128Bytes(unsigned Value) {
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       if (Value) Byte |= 0x80;
       emitByte(Byte);
@@ -194,12 +194,12 @@ public:
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
   /// written to the output stream.
-  void emitSLEB128Bytes(int Value) {
-    int Sign = Value >> (8 * sizeof(Value) - 1);
+  void emitSLEB128Bytes(int32_t Value) {
+    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
     bool IsMore;
   
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
       if (IsMore) Byte |= 0x80;
@@ -212,14 +212,14 @@ public:
   void emitString(const std::string &String) {
     for (unsigned i = 0, N = static_cast<unsigned>(String.size());
          i < N; ++i) {
-      unsigned char C = String[i];
+      uint8_t C = String[i];
       emitByte(C);
     }
     emitByte(0);
   }
   
   /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int Value) {
+  void emitInt32(int32_t Value) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *((uint32_t*)CurBufferPtr) = Value;
       CurBufferPtr += 4;
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index caabf07..33e2e00 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -18,17 +18,17 @@
 #undef ENABLE_CBE_PRINTF_A
 
 /* Define if position independent code is enabled */
-#undef ENABLE_PIC
+#cmakedefine ENABLE_PIC ${ENABLE_PIC}
 
 /* Define if threads enabled */
 #cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
 
 /* Define to 1 if you have `alloca', as a function or macro. */
-#undef HAVE_ALLOCA
+#cmakedefine HAVE_ALLOCA ${HAVE_ALLOCA}
 
 /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
    */
-#undef HAVE_ALLOCA_H
+#cmakedefine HAVE_ALLOCA_H ${HAVE_ALLOCA_H}
 
 /* Define to 1 if you have the `argz_append' function. */
 #undef HAVE_ARGZ_APPEND
@@ -228,7 +228,7 @@
 #cmakedefine HAVE_MALLOC_MALLOC_H ${HAVE_MALLOC_MALLOC_H}
 
 /* Define to 1 if you have the `malloc_zone_statistics' function. */
-#undef HAVE_MALLOC_ZONE_STATISTICS
+#cmakedefine HAVE_MALLOC_ZONE_STATISTICS ${HAVE_MALLOC_ZONE_STATISTICS}
 
 /* Define to 1 if you have the `memcpy' function. */
 #undef HAVE_MEMCPY
@@ -414,7 +414,7 @@
 #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
 
 /* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
-#undef HAVE_SYS_WAIT_H
+#cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H}
 
 /* Define to 1 if the system has the type `uint64_t'. */
 #undef HAVE_UINT64_T
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 9e95a08..ed0fe27 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -704,10 +704,14 @@ public:
   /// specify the full Instruction::OPCODE identifier.
   ///
   static Constant *getNeg(Constant *C);
+  static Constant *getFNeg(Constant *C);
   static Constant *getNot(Constant *C);
   static Constant *getAdd(Constant *C1, Constant *C2);
+  static Constant *getFAdd(Constant *C1, Constant *C2);
   static Constant *getSub(Constant *C1, Constant *C2);
+  static Constant *getFSub(Constant *C1, Constant *C2);
   static Constant *getMul(Constant *C1, Constant *C2);
+  static Constant *getFMul(Constant *C1, Constant *C2);
   static Constant *getUDiv(Constant *C1, Constant *C2);
   static Constant *getSDiv(Constant *C1, Constant *C2);
   static Constant *getFDiv(Constant *C1, Constant *C2);
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 581300e..688a162 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -60,7 +60,7 @@ public:
   
   /// getGOTBase - If this is managing a Global Offset Table, this method should
   /// return a pointer to its base.
-  virtual unsigned char *getGOTBase() const = 0;
+  virtual uint8_t *getGOTBase() const = 0;
   
   /// SetDlsymTable - If the JIT must be able to relocate stubs after they have
   /// been emitted, potentially because they are being copied to a process
@@ -89,8 +89,8 @@ public:
   /// emit the function, so it doesn't pass in the size.  Instead, this method
   /// is required to pass back a "valid size".  The JIT will be careful to not
   /// write more than the returned ActualSize bytes of memory. 
-  virtual unsigned char *startFunctionBody(const Function *F, 
-                                           uintptr_t &ActualSize) = 0;
+  virtual uint8_t *startFunctionBody(const Function *F, 
+                                     uintptr_t &ActualSize) = 0;
   
   /// allocateStub - This method is called by the JIT to allocate space for a
   /// function stub (used to handle limited branch displacements) while it is
@@ -100,9 +100,8 @@ public:
   /// thunk for it.  The stub should be "close" to the current function body,
   /// but should not be included in the 'actualsize' returned by
   /// startFunctionBody.
-  virtual unsigned char *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                      unsigned Alignment) =0;
-  
+  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                                unsigned Alignment) = 0;
   
   /// endFunctionBody - This method is called when the JIT is done codegen'ing
   /// the specified function.  At this point we know the size of the JIT
@@ -110,11 +109,11 @@ public:
   /// the startFunctionBody method) and FunctionEnd which is a pointer to the 
   /// actual end of the function.  This method should mark the space allocated
   /// and remember where it is in case the client wants to deallocate it.
-  virtual void endFunctionBody(const Function *F, unsigned char *FunctionStart,
-                               unsigned char *FunctionEnd) = 0;
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) = 0;
 
   /// allocateSpace - Allocate a memory block of the given size.
-  virtual unsigned char *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
   
   /// deallocateMemForFunction - Free JIT memory for the specified function.
   /// This is never called when the JIT is currently emitting a function.
@@ -122,14 +121,13 @@ public:
   
   /// startExceptionTable - When we finished JITing the function, if exception
   /// handling is set, we emit the exception table.
-  virtual unsigned char* startExceptionTable(const Function* F,
-                                             uintptr_t &ActualSize) = 0;
+  virtual uint8_t* startExceptionTable(const Function* F,
+                                       uintptr_t &ActualSize) = 0;
   
   /// endExceptionTable - This method is called when the JIT is done emitting
   /// the exception table.
-  virtual void endExceptionTable(const Function *F, unsigned char *TableStart,
-                                 unsigned char *TableEnd, 
-                                 unsigned char* FrameRegister) = 0;
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t* FrameRegister) = 0;
 };
 
 } // end namespace llvm.
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index 3a774d4..1eab983 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -204,21 +204,30 @@ public:
                                    Instruction *InsertBefore = 0);
   static BinaryOperator *CreateNeg(Value *Op, const std::string &Name,
                                    BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name = "",
+                                    Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name,
+                                    BasicBlock *InsertAtEnd);
   static BinaryOperator *CreateNot(Value *Op, const std::string &Name = "",
                                    Instruction *InsertBefore = 0);
   static BinaryOperator *CreateNot(Value *Op, const std::string &Name,
                                    BasicBlock *InsertAtEnd);
 
-  /// isNeg, isNot - Check if the given Value is a NEG or NOT instruction.
+  /// isNeg, isFNeg, isNot - Check if the given Value is a
+  /// NEG, FNeg, or NOT instruction.
   ///
   static bool isNeg(const Value *V);
+  static bool isFNeg(const Value *V);
   static bool isNot(const Value *V);
 
   /// getNegArgument, getNotArgument - Helper functions to extract the
-  ///     unary argument of a NEG or NOT operation implemented via Sub or Xor.
+  ///     unary argument of a NEG, FNEG or NOT operation implemented via
+  ///     Sub, FSub, or Xor.
   ///
   static const Value *getNegArgument(const Value *BinOp);
   static       Value *getNegArgument(      Value *BinOp);
+  static const Value *getFNegArgument(const Value *BinOp);
+  static       Value *getFNegArgument(      Value *BinOp);
   static const Value *getNotArgument(const Value *BinOp);
   static       Value *getNotArgument(      Value *BinOp);
 
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index 66bf2ce..98fda77 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -105,71 +105,74 @@ HANDLE_TERM_INST  ( 6, Unreachable, UnreachableInst)
 // Standard binary operators...
  FIRST_BINARY_INST( 7)
 HANDLE_BINARY_INST( 7, Add  , BinaryOperator)
-HANDLE_BINARY_INST( 8, Sub  , BinaryOperator)
-HANDLE_BINARY_INST( 9, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(10, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(11, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(12, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(13, URem , BinaryOperator)
-HANDLE_BINARY_INST(14, SRem , BinaryOperator)
-HANDLE_BINARY_INST(15, FRem , BinaryOperator)
+HANDLE_BINARY_INST( 8, FAdd  , BinaryOperator)
+HANDLE_BINARY_INST( 9, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(10, FSub  , BinaryOperator)
+HANDLE_BINARY_INST(11, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(12, FMul  , BinaryOperator)
+HANDLE_BINARY_INST(13, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(14, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(15, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(16, URem , BinaryOperator)
+HANDLE_BINARY_INST(17, SRem , BinaryOperator)
+HANDLE_BINARY_INST(18, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(16, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(17, LShr , BinaryOperator) // Shift right (logical) 
-HANDLE_BINARY_INST(18, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(19, And  , BinaryOperator)
-HANDLE_BINARY_INST(20, Or   , BinaryOperator)
-HANDLE_BINARY_INST(21, Xor  , BinaryOperator)
-  LAST_BINARY_INST(21)
+HANDLE_BINARY_INST(19, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(20, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(21, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(22, And  , BinaryOperator)
+HANDLE_BINARY_INST(23, Or   , BinaryOperator)
+HANDLE_BINARY_INST(24, Xor  , BinaryOperator)
+  LAST_BINARY_INST(24)
 
 // Memory operators...
- FIRST_MEMORY_INST(22)
-HANDLE_MEMORY_INST(22, Malloc, MallocInst)  // Heap management instructions
-HANDLE_MEMORY_INST(23, Free  , FreeInst  )
-HANDLE_MEMORY_INST(24, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(25, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(26, Store , StoreInst )
-HANDLE_MEMORY_INST(27, GetElementPtr, GetElementPtrInst)
-  LAST_MEMORY_INST(27)
+ FIRST_MEMORY_INST(25)
+HANDLE_MEMORY_INST(25, Malloc, MallocInst)  // Heap management instructions
+HANDLE_MEMORY_INST(26, Free  , FreeInst  )
+HANDLE_MEMORY_INST(27, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(28, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(29, Store , StoreInst )
+HANDLE_MEMORY_INST(30, GetElementPtr, GetElementPtrInst)
+  LAST_MEMORY_INST(30)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair 
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(28)
-HANDLE_CAST_INST(28, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(29, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(30, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(31, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(32, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(33, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(34, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(35, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(36, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(37, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(38, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(39, BitCast , BitCastInst )  // Type cast
-  LAST_CAST_INST(39)
+ FIRST_CAST_INST(31)
+HANDLE_CAST_INST(31, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(32, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(33, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(34, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(35, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(36, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(37, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(38, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(39, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(40, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(41, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(42, BitCast , BitCastInst )  // Type cast
+  LAST_CAST_INST(42)
 
 // Other operators...
- FIRST_OTHER_INST(40)
-HANDLE_OTHER_INST(40, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(41, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(42, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(43, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(44, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(45, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(46, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(47, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(48, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(49, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(50, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(51, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(52, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(53, VICmp  , VICmpInst  )  // Vec Int comparison instruction.
-HANDLE_OTHER_INST(54, VFCmp  , VFCmpInst  )  // Vec FP point comparison instr.
-
-  LAST_OTHER_INST(55)
+ FIRST_OTHER_INST(43)
+HANDLE_OTHER_INST(43, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(44, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(45, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(46, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(47, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(48, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(49, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(50, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(51, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(52, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(53, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(54, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(55, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(56, VICmp  , VICmpInst  )  // Vec Int comparison instruction.
+HANDLE_OTHER_INST(57, VFCmp  , VFCmpInst  )  // Vec FP point comparison instr.
+
+  LAST_OTHER_INST(57)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index ca8bcae..35065a0 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -32,12 +32,21 @@ public:
   Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getAdd(LHS, RHS);
   }
+  Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFAdd(LHS, RHS);
+  }
   Constant *CreateSub(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getSub(LHS, RHS);
   }
+  Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFSub(LHS, RHS);
+  }
   Constant *CreateMul(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getMul(LHS, RHS);
   }
+  Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFMul(LHS, RHS);
+  }
   Constant *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getUDiv(LHS, RHS);
   }
@@ -87,6 +96,9 @@ public:
   Constant *CreateNeg(Constant *C) const {
     return ConstantExpr::getNeg(C);
   }
+  Constant *CreateFNeg(Constant *C) const {
+    return ConstantExpr::getFNeg(C);
+  }
   Constant *CreateNot(Constant *C) const {
     return ConstantExpr::getNot(C);
   }
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 9ef14af..7942de7 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -175,18 +175,36 @@ public:
         return Folder.CreateAdd(LC, RC);
     return Insert(BinaryOperator::CreateAdd(LHS, RHS), Name);
   }
+  Value *CreateFAdd(Value *LHS, Value *RHS, const char *Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateFAdd(LC, RC);
+    return Insert(BinaryOperator::CreateFAdd(LHS, RHS), Name);
+  }
   Value *CreateSub(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateSub(LC, RC);
     return Insert(BinaryOperator::CreateSub(LHS, RHS), Name);
   }
+  Value *CreateFSub(Value *LHS, Value *RHS, const char *Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateFSub(LC, RC);
+    return Insert(BinaryOperator::CreateFSub(LHS, RHS), Name);
+  }
   Value *CreateMul(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateMul(LC, RC);
     return Insert(BinaryOperator::CreateMul(LHS, RHS), Name);
   }
+  Value *CreateFMul(Value *LHS, Value *RHS, const char *Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateMul(LC, RC);
+    return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name);
+  }
   Value *CreateUDiv(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 1dce497..a49cf84 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -39,12 +39,21 @@ public:
   Value *CreateAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAdd(LHS, RHS);
   }
+  Value *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFAdd(LHS, RHS);
+  }
   Value *CreateSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSub(LHS, RHS);
   }
+  Value *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFSub(LHS, RHS);
+  }
   Value *CreateMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateMul(LHS, RHS);
   }
+  Value *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFMul(LHS, RHS);
+  }
   Value *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateUDiv(LHS, RHS);
   }
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index d27a7f1..fda925f 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -157,18 +157,36 @@ inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
 }
 
 template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
+                                                          const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
+}
+
+template<typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
                                                         const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
 }
 
 template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
+                                                          const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
+}
+
+template<typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
                                                         const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
 }
 
 template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
+                                                          const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
+}
+
+template<typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
                                                         const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
@@ -494,6 +512,35 @@ template<typename LHS>
 inline neg_match<LHS> m_Neg(const LHS &L) { return L; }
 
 
+template<typename LHS_t>
+struct fneg_match {
+  LHS_t L;
+
+  fneg_match(const LHS_t &LHS) : L(LHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (I->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(I->getOperand(0), I->getOperand(1));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(CE->getOperand(0), CE->getOperand(1));
+    if (ConstantFP *CF = dyn_cast<ConstantFP>(V))
+      return L.match(ConstantExpr::getFNeg(CF));
+    return false;
+  }
+private:
+  bool matchIfFNeg(Value *LHS, Value *RHS) {
+    return LHS == ConstantExpr::getZeroValueForNegationExpr(LHS->getType()) &&
+           L.match(RHS);
+  }
+};
+
+template<typename LHS>
+inline fneg_match<LHS> m_FNeg(const LHS &L) { return L; }
+
+
 //===----------------------------------------------------------------------===//
 // Matchers for control flow
 //
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index 024c019..5c63034 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -60,15 +60,10 @@ namespace llvm {
   ///
   /// Internalize - Run the internalize pass.
   /// RunInliner - Use a function inlining pass.
-  /// RunSecondGlobalOpt - Run the global optimizer pass twice.
   /// VerifyEach - Run the verifier after each pass.
-  //
-  // FIXME: RunSecondGlobalOpt should go away once we resolve which of LTO or
-  // llvm-ld is better.
   static inline void createStandardLTOPasses(PassManager *PM,
                                              bool Internalize,
                                              bool RunInliner,
-                                             bool RunSecondGlobalOpt,
                                              bool VerifyEach);
 
   // Implementations
@@ -173,7 +168,6 @@ namespace llvm {
   static inline void createStandardLTOPasses(PassManager *PM,
                                              bool Internalize,
                                              bool RunInliner,
-                                             bool RunSecondGlobalOpt,
                                              bool VerifyEach) {
     // Now that composite has been compiled, scan through the module, looking
     // for a main function.  If main is defined, mark all other functions
@@ -207,8 +201,8 @@ namespace llvm {
       addOnePass(PM, createFunctionInliningPass(), VerifyEach);
 
     addOnePass(PM, createPruneEHPass(), VerifyEach);   // Remove dead EH info.
-    // Optimize globals again.
-    if (RunSecondGlobalOpt)
+    // Optimize globals again if we ran the inliner.
+    if (RunInliner)
       addOnePass(PM, createGlobalOptimizerPass(), VerifyEach);
     addOnePass(PM, createGlobalDCEPass(), VerifyEach); // Remove dead functions.
 
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index 172e4fe..b0700c1 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -48,12 +48,21 @@ public:
   Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getAdd(LHS, RHS));
   }
+  Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFAdd(LHS, RHS));
+  }
   Constant *CreateSub(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getSub(LHS, RHS));
   }
+  Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFSub(LHS, RHS));
+  }
   Constant *CreateMul(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getMul(LHS, RHS));
   }
+  Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFMul(LHS, RHS));
+  }
   Constant *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getUDiv(LHS, RHS));
   }
@@ -103,6 +112,9 @@ public:
   Constant *CreateNeg(Constant *C) const {
     return Fold(ConstantExpr::getNeg(C));
   }
+  Constant *CreateFNeg(Constant *C) const {
+    return Fold(ConstantExpr::getFNeg(C));
+  }
   Constant *CreateNot(Constant *C) const {
     return Fold(ConstantExpr::getNot(C));
   }
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index b67d126..8242f04 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -45,6 +45,19 @@ private:
   bool Unbuffered;
 
 public:
+  // color order matches ANSI escape sequence, don't change
+  enum Colors {
+    BLACK=0,
+    RED,
+    GREEN,
+    YELLOW,
+    BLUE,
+    MAGENTA,
+    CYAN,
+    WHITE,
+    SAVEDCOLOR
+  };
+
   explicit raw_ostream(bool unbuffered=false) : Unbuffered(unbuffered) {
     // Start out ready to flush.
     OutBufStart = OutBufEnd = OutBufCur = 0;
@@ -167,6 +180,20 @@ public:
   // Formatted output, see the format() function in Support/Format.h.
   raw_ostream &operator<<(const format_object_base &Fmt);
 
+  /// Changes the foreground color of text that will be output from this point
+  /// forward.
+  /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to
+  /// change only the bold attribute, and keep colors untouched
+  /// @param bold bold/brighter text, default false
+  /// @param bg if true change the background, default: change foreground
+  /// @returns itself so it can be used within << invocations
+  virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
+                                   bool  bg=false) { return *this; }
+
+  /// Resets the colors to terminal defaults. Call this when you are done
+  /// outputting colored text, or before program exit.
+  virtual raw_ostream &resetColor() { return *this; }
+
   //===--------------------------------------------------------------------===//
   // Subclass Interface
   //===--------------------------------------------------------------------===//
@@ -243,6 +270,10 @@ public:
   /// seek - Flushes the stream and repositions the underlying file descriptor
   ///  positition to the offset specified from the beginning of the file.
   uint64_t seek(uint64_t off);
+
+  virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
+                                   bool bg=false);
+  virtual raw_ostream &resetColor();
 };
 
 /// raw_stdout_ostream - This is a stream that always prints to stdout.
diff --git a/include/llvm/System/Process.h b/include/llvm/System/Process.h
index ce19eb2..11dbf75 100644
--- a/include/llvm/System/Process.h
+++ b/include/llvm/System/Process.h
@@ -107,6 +107,34 @@ namespace sys {
       /// console, or if the number of columns cannot be determined,
       /// this routine returns zero.
       static unsigned StandardErrColumns();
+
+      /// This function determines whether the terminal connected to standard
+      /// output supports colors. If standard output is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardOutHasColors();
+
+      /// This function determines whether the terminal connected to standard
+      /// error supports colors. If standard error is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardErrHasColors();
+
+      /// Whether changing colors requires the output to be flushed.
+      /// This is needed on systems that don't support escape sequences for
+      /// changing colors.
+      static bool ColorNeedsFlush();
+
+      /// This function returns the colorcode escape sequences.
+      /// If ColorNeedsFlush() is true then this function will change the colors
+      /// and return an empty escape sequence. In that case it is the
+      /// responsibility of the client to flush the output stream prior to
+      /// calling this function.
+      static const char *OutputColor(char c, bool bold, bool bg);
+
+      /// Same as OutputColor, but only enables the bold attribute.
+      static const char *OutputBold(bool bg);
+
+      /// Resets the terminals colors, or returns an escape sequence to do so.
+      static const char *ResetColor();
     /// @}
   };
 }
diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h
index 548cc07..e266a71 100644
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@@ -25,9 +25,23 @@ namespace llvm {
     // e_machine member of the ELF header.
     unsigned short EMachine;
   public:
+
+    // Machine architectures
     enum MachineType {
-      NoMachine,
-      EM_386 = 3
+      EM_NONE = 0,     // No machine
+      EM_M32 = 1,      // AT&T WE 32100
+      EM_SPARC = 2,    // SPARC
+      EM_386 = 3,      // Intel 386
+      EM_68K = 4,      // Motorola 68000
+      EM_88K = 5,      // Motorola 88000
+      EM_486 = 6,      // Intel 486 (deprecated)
+      EM_860 = 7,      // Intel 80860
+      EM_MIPS = 8,     // MIPS R3000
+      EM_PPC = 20,     // PowerPC
+      EM_ARM = 40,     // ARM
+      EM_ALPHA = 41,   // DEC Alpha
+      EM_SPARCV9 = 43, // SPARC V9
+      EM_X86_64 = 62   // AMD64
     };
 
     explicit TargetELFWriterInfo(MachineType machine) : EMachine(machine) {}
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 163f4c5..327af27 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -632,7 +632,8 @@ public:
   /// It returns MVT::iAny if SelectionDAG should be responsible for
   /// determining it.
   virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                  bool isSrcConst, bool isSrcStr) const {
+                                  bool isSrcConst, bool isSrcStr,
+                                  SelectionDAG &DAG) const {
     return MVT::iAny;
   }
   
@@ -825,11 +826,11 @@ public:
   virtual bool
   isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
 
-  /// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-  /// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-  /// location that the 'Base' load is loading from.
-  bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist,
-                         const MachineFrameInfo *MFI) const;
+  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+  /// location that is 'Dist' units away from the location that the 'Base' load 
+  /// is loading from.
+  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes,
+                         int Dist, const MachineFrameInfo *MFI) const;
 
   /// PerformDAGCombine - This method will be invoked for all target nodes and
   /// for any target-independent nodes that the target has registered with
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 06d7d79..0c74fa1 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -73,12 +73,6 @@ namespace llvm {
   /// target FP instructions.
   extern bool UseSoftFloat;
 
-  /// NoImplicitFloat - This flag is enabled when the -no-implicit-float flag is
-  /// specified on the command line.  When this flag is on, the code generator
-  /// won't generate any implicit floating point instructions. I.e., no XMM or
-  /// x87 or vectorized memcpy/memmove instructions. This is for X86 only.
-  extern bool NoImplicitFloat;
-
   /// NoZerosInBSS - By default some codegens place zero-initialized data to
   /// .bss section. This flag disables such behaviour (necessary, e.g. for
   /// crt*.o compiling).
@@ -117,10 +111,6 @@ namespace llvm {
   /// wth earlier copy coalescing.
   extern bool StrongPHIElim;
 
-  /// DisableRedZone - This flag disables use of the "Red Zone" on
-  /// targets which would otherwise have one.
-  extern bool DisableRedZone;
-
 } // End llvm namespace
 
 #endif
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 2dea7b3..2b34ad3 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/ADT/Statistic.h"
-#include <ostream>
 using namespace llvm;
 
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index de6480a..a0d3974 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -24,7 +24,6 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
-#include <ostream>
 using namespace llvm;
 
 char LoopInfo::ID = 0;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f7f1849..03c5005 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -80,7 +80,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include <ostream>
 #include <algorithm>
 using namespace llvm;
 
@@ -2463,24 +2462,15 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
 ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   // If the loop has a non-one exit block count, we can't analyze it.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1) return UnknownValue;
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock)
+    return UnknownValue;
 
   // Okay, there is one exit block.  Try to find the condition that causes the
   // loop to be exited.
-  BasicBlock *ExitBlock = ExitBlocks[0];
-
-  BasicBlock *ExitingBlock = 0;
-  for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
-       PI != E; ++PI)
-    if (L->contains(*PI)) {
-      if (ExitingBlock == 0)
-        ExitingBlock = *PI;
-      else
-        return UnknownValue;   // More than one block exiting!
-    }
-  assert(ExitingBlock && "No exits from loop, something is broken!");
+  BasicBlock *ExitingBlock = L->getExitingBlock();
+  if (!ExitingBlock)
+    return UnknownValue;   // More than one block exiting!
 
   // Okay, we've computed the exiting block.  See what condition causes us to
   // exit.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7ba8268..ef77e46 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -644,3 +644,16 @@ Value *SCEVExpander::expand(const SCEV *S) {
   InsertedExpressions[S] = V;
   return V;
 }
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none).  A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+Value *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+                                                    const Type *Ty) {
+  assert(Ty->isInteger() && "Can only insert integer induction variables!");
+  SCEVHandle H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
+                                  SE.getIntegerSCEV(1, Ty), L);
+  return expand(H);
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 29ff8aa..45f97b8 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -771,7 +771,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (I == 0) return false;
   
   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
-  if (I->getOpcode() == Instruction::Add &&
+  if (I->getOpcode() == Instruction::FAdd &&
       isa<ConstantFP>(I->getOperand(1)) && 
       cast<ConstantFP>(I->getOperand(1))->isNullValue())
     return true;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f2e6890..c5190ef 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -547,6 +547,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(optsize);
   KEYWORD(ssp);
   KEYWORD(sspreq);
+  KEYWORD(noredzone);
+  KEYWORD(noimplicitfloat);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -590,7 +592,9 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
     UIntVal = Instruction::Enum; return lltok::kw_##STR; }
 
-  INSTKEYWORD(add,   Add);  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(mul,   Mul);
+  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
+  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
+  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
   INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
   INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
   INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 8db4c71..5c44502 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -712,25 +712,26 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
         return Error(AttrLoc, "invalid use of parameter-only attribute");
         
       return false;
-    case lltok::kw_zeroext:      Attrs |= Attribute::ZExt; break;
-    case lltok::kw_signext:      Attrs |= Attribute::SExt; break;
-    case lltok::kw_inreg:        Attrs |= Attribute::InReg; break;
-    case lltok::kw_sret:         Attrs |= Attribute::StructRet; break;
-    case lltok::kw_noalias:      Attrs |= Attribute::NoAlias; break;
-    case lltok::kw_nocapture:    Attrs |= Attribute::NoCapture; break;
-    case lltok::kw_byval:        Attrs |= Attribute::ByVal; break;
-    case lltok::kw_nest:         Attrs |= Attribute::Nest; break;
-
-    case lltok::kw_noreturn:     Attrs |= Attribute::NoReturn; break;
-    case lltok::kw_nounwind:     Attrs |= Attribute::NoUnwind; break;
-    case lltok::kw_noinline:     Attrs |= Attribute::NoInline; break;
-    case lltok::kw_readnone:     Attrs |= Attribute::ReadNone; break;
-    case lltok::kw_readonly:     Attrs |= Attribute::ReadOnly; break;
-    case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break;
-    case lltok::kw_optsize:      Attrs |= Attribute::OptimizeForSize; break;
-    case lltok::kw_ssp:          Attrs |= Attribute::StackProtect; break;
-    case lltok::kw_sspreq:       Attrs |= Attribute::StackProtectReq; break;
-
+    case lltok::kw_zeroext:         Attrs |= Attribute::ZExt; break;
+    case lltok::kw_signext:         Attrs |= Attribute::SExt; break;
+    case lltok::kw_inreg:           Attrs |= Attribute::InReg; break;
+    case lltok::kw_sret:            Attrs |= Attribute::StructRet; break;
+    case lltok::kw_noalias:         Attrs |= Attribute::NoAlias; break;
+    case lltok::kw_nocapture:       Attrs |= Attribute::NoCapture; break;
+    case lltok::kw_byval:           Attrs |= Attribute::ByVal; break;
+    case lltok::kw_nest:            Attrs |= Attribute::Nest; break;
+
+    case lltok::kw_noreturn:        Attrs |= Attribute::NoReturn; break;
+    case lltok::kw_nounwind:        Attrs |= Attribute::NoUnwind; break;
+    case lltok::kw_noinline:        Attrs |= Attribute::NoInline; break;
+    case lltok::kw_readnone:        Attrs |= Attribute::ReadNone; break;
+    case lltok::kw_readonly:        Attrs |= Attribute::ReadOnly; break;
+    case lltok::kw_alwaysinline:    Attrs |= Attribute::AlwaysInline; break;
+    case lltok::kw_optsize:         Attrs |= Attribute::OptimizeForSize; break;
+    case lltok::kw_ssp:             Attrs |= Attribute::StackProtect; break;
+    case lltok::kw_sspreq:          Attrs |= Attribute::StackProtectReq; break;
+    case lltok::kw_noredzone:       Attrs |= Attribute::NoRedZone; break;
+    case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
         
     case lltok::kw_align: {
       unsigned Alignment;
@@ -1835,8 +1836,11 @@ bool LLParser::ParseValID(ValID &ID) {
       
   // Binary Operators.
   case lltok::kw_add:
+  case lltok::kw_fadd:
   case lltok::kw_sub:
+  case lltok::kw_fsub:
   case lltok::kw_mul:
+  case lltok::kw_fmul:
   case lltok::kw_udiv:
   case lltok::kw_sdiv:
   case lltok::kw_fdiv:
@@ -2400,8 +2404,13 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
-  case lltok::kw_mul:    return ParseArithmetic(Inst, PFS, KeywordVal, 0);
-      
+  case lltok::kw_mul:
+    // API compatibility: Accept either integer or floating-point types.
+    return ParseArithmetic(Inst, PFS, KeywordVal, 0);
+  case lltok::kw_fadd:
+  case lltok::kw_fsub:
+  case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+
   case lltok::kw_udiv:
   case lltok::kw_sdiv:
   case lltok::kw_urem:
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index d8bd38a..9335d19 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -80,6 +80,8 @@ namespace lltok {
     kw_optsize,
     kw_ssp,
     kw_sspreq,
+    kw_noredzone,
+    kw_noimplicitfloat,
 
     kw_type,
     kw_opaque,
@@ -89,7 +91,8 @@ namespace lltok {
     kw_ueq, kw_une,
 
     // Instruction Opcodes (Opcode in UIntVal).
-    kw_add,  kw_sub,  kw_mul,  kw_udiv, kw_sdiv, kw_fdiv,
+    kw_add,  kw_fadd, kw_sub,  kw_fsub, kw_mul,  kw_fmul,
+    kw_udiv, kw_sdiv, kw_fdiv,
     kw_urem, kw_srem, kw_frem, kw_shl,  kw_lshr, kw_ashr,
     kw_and,  kw_or,   kw_xor,  kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp,
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 1dad04b..3b44f564 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -104,9 +104,12 @@ static int GetDecodedCastOpcode(unsigned Val) {
 static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
   switch (Val) {
   default: return -1;
-  case bitc::BINOP_ADD:  return Instruction::Add;
-  case bitc::BINOP_SUB:  return Instruction::Sub;
-  case bitc::BINOP_MUL:  return Instruction::Mul;
+  case bitc::BINOP_ADD:
+    return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add;
+  case bitc::BINOP_SUB:
+    return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub;
+  case bitc::BINOP_MUL:
+    return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul;
   case bitc::BINOP_UDIV: return Instruction::UDiv;
   case bitc::BINOP_SDIV:
     return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index bfc029c..9f16728 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -77,9 +77,12 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) {
 static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
   switch (Opcode) {
   default: assert(0 && "Unknown binary instruction!");
-  case Instruction::Add:  return bitc::BINOP_ADD;
-  case Instruction::Sub:  return bitc::BINOP_SUB;
-  case Instruction::Mul:  return bitc::BINOP_MUL;
+  case Instruction::Add:
+  case Instruction::FAdd: return bitc::BINOP_ADD;
+  case Instruction::Sub:
+  case Instruction::FSub: return bitc::BINOP_SUB;
+  case Instruction::Mul:
+  case Instruction::FMul: return bitc::BINOP_MUL;
   case Instruction::UDiv: return bitc::BINOP_UDIV;
   case Instruction::FDiv:
   case Instruction::SDiv: return bitc::BINOP_SDIV;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 25217b0..5a66f4b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
-#include <ostream>
 using namespace llvm;
 
 static TimerGroup &getDwarfTimerGroup() {
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index 45e7dd3..f7ca4f4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include <ostream>
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
new file mode 100644
index 0000000..bf43622
--- /dev/null
+++ b/lib/CodeGen/ELF.h
@@ -0,0 +1,186 @@
+//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on
+// the Tool Interface Standard (TIS) Executable and Linking Format
+// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not
+// standardized, as far as I can tell. It was largely based on information
+// I found in OpenBSD header files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ELF_H
+#define CODEGEN_ELF_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
+
+namespace llvm {
+  class GlobalVariable;
+
+  // Identification Indexes
+  enum {
+    EI_MAG0 = 0,
+    EI_MAG1 = 1,
+    EI_MAG2 = 2,
+    EI_MAG3 = 3
+  };
+
+  // File types
+  enum {
+    ET_NONE   = 0,      // No file type
+    ET_REL    = 1,      // Relocatable file
+    ET_EXEC   = 2,      // Executable file
+    ET_DYN    = 3,      // Shared object file
+    ET_CORE   = 4,      // Core file
+    ET_LOPROC = 0xff00, // Beginning of processor-specific codes
+    ET_HIPROC = 0xffff  // Processor-specific
+  };
+
+  // Object file classes.
+  enum {
+    ELFCLASS32 = 1, // 32-bit object file
+    ELFCLASS64 = 2  // 64-bit object file
+  };
+
+  // Object file byte orderings.
+  enum {
+    ELFDATA2LSB = 1, // Little-endian object file
+    ELFDATA2MSB = 2  // Big-endian object file
+  };
+
+  // Versioning
+  enum {
+    EV_NONE = 0,
+    EV_CURRENT = 1
+  };
+
+  /// ELFSection - This struct contains information about each section that is
+  /// emitted to the file.  This is eventually turned into the section header
+  /// table at the end of the file.
+  struct ELFSection {
+
+    // ELF specific fields
+    std::string Name;       // Name of the section.
+    unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
+    unsigned Type;
+    unsigned Flags;
+    uint64_t Addr;
+    unsigned Offset;
+    unsigned Size;
+    unsigned Link;
+    unsigned Info;
+    unsigned Align;
+    unsigned EntSize;
+
+    // Section Header Flags
+    enum {
+      SHF_WRITE            = 1 << 0, // Writable
+      SHF_ALLOC            = 1 << 1, // Mapped into the process addr space
+      SHF_EXECINSTR        = 1 << 2, // Executable
+      SHF_MERGE            = 1 << 4, // Might be merged if equal
+      SHF_STRINGS          = 1 << 5, // Contains null-terminated strings
+      SHF_INFO_LINK        = 1 << 6, // 'sh_info' contains SHT index
+      SHF_LINK_ORDER       = 1 << 7, // Preserve order after combining
+      SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+      SHF_GROUP            = 1 << 9, // Section is a member of a group
+      SHF_TLS              = 1 << 10 // Section holds thread-local data
+    };
+
+    // Section Types
+    enum {
+      SHT_NULL     = 0,  // No associated section (inactive entry).
+      SHT_PROGBITS = 1,  // Program-defined contents.
+      SHT_SYMTAB   = 2,  // Symbol table.
+      SHT_STRTAB   = 3,  // String table.
+      SHT_RELA     = 4,  // Relocation entries; explicit addends.
+      SHT_HASH     = 5,  // Symbol hash table.
+      SHT_DYNAMIC  = 6,  // Information for dynamic linking.
+      SHT_NOTE     = 7,  // Information about the file.
+      SHT_NOBITS   = 8,  // Data occupies no space in the file.
+      SHT_REL      = 9,  // Relocation entries; no explicit addends.
+      SHT_SHLIB    = 10, // Reserved.
+      SHT_DYNSYM   = 11, // Symbol table.
+      SHT_LOPROC   = 0x70000000, // Lowest processor architecture-specific type.
+      SHT_HIPROC   = 0x7fffffff, // Highest processor architecture-specific type.
+      SHT_LOUSER   = 0x80000000, // Lowest type reserved for applications.
+      SHT_HIUSER   = 0xffffffff  // Highest type reserved for applications.
+    };
+
+    // Special section indices.
+    enum {
+      SHN_UNDEF     = 0,      // Undefined, missing, irrelevant, or meaningless
+      SHN_LORESERVE = 0xff00, // Lowest reserved index
+      SHN_LOPROC    = 0xff00, // Lowest processor-specific index
+      SHN_HIPROC    = 0xff1f, // Highest processor-specific index
+      SHN_ABS       = 0xfff1, // Symbol has absolute value; does not need relocation
+      SHN_COMMON    = 0xfff2, // FORTRAN COMMON or C external global variables
+      SHN_HIRESERVE = 0xffff  // Highest reserved index
+    };
+
+    /// SectionIdx - The number of the section in the Section Table.
+    unsigned short SectionIdx;
+
+    /// SectionData - The actual data for this section which we are building
+    /// up for emission to the file.
+    std::vector<unsigned char> SectionData;
+
+    ELFSection(const std::string &name)
+      : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
+        Link(0), Info(0), Align(0), EntSize(0) {}
+  };
+
+  /// ELFSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct ELFSym {
+    const GlobalValue *GV;    // The global value this corresponds to.
+
+    // ELF specific fields
+    unsigned NameIdx;         // Index in .strtab of name, once emitted.
+    uint64_t Value;
+    unsigned Size;
+    uint8_t Info;
+    uint8_t Other;
+    unsigned short SectionIdx;
+
+    enum { 
+      STB_LOCAL = 0,
+      STB_GLOBAL = 1,
+      STB_WEAK = 2 
+    };
+
+    enum { 
+      STT_NOTYPE = 0,
+      STT_OBJECT = 1,
+      STT_FUNC = 2,
+      STT_SECTION = 3,
+      STT_FILE = 4 
+    };
+
+    ELFSym(const GlobalValue *gv) : GV(gv), Value(0),
+                                    Size(0), Info(0), Other(0),
+                                    SectionIdx(ELFSection::SHN_UNDEF) {}
+
+    void SetBind(unsigned X) {
+      assert(X == (X & 0xF) && "Bind value out of range!");
+      Info = (Info & 0x0F) | (X << 4);
+    }
+    void SetType(unsigned X) {
+      assert(X == (X & 0xF) && "Type value out of range!");
+      Info = (Info & 0xF0) | X;
+    }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 0a0245f..9af276b 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "elfce"
+
 #include "ELFCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Mangler.h"
-#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Debug.h"
 
 //===----------------------------------------------------------------------===//
 //                       ELFCodeEmitter Implementation
@@ -27,67 +27,87 @@ namespace llvm {
 
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
-void ELFCodeEmitter::startFunction(MachineFunction &F) {
-  // Align the output buffer to the appropriate alignment.
-  unsigned Align = 16;   // FIXME: GENERICIZE!!
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+  const TargetData *TD = TM.getTargetData();
+  const Function *F = MF.getFunction();
+
+  // Align the output buffer to the appropriate alignment, power of 2.
+  unsigned FnAlign = F->getAlignment();
+  unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
+  unsigned Align = std::max(FnAlign, TDAlign);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+
   // Get the ELF Section that this function belongs in.
-  ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS,
-                      ELFWriter::ELFSection::SHF_EXECINSTR |
-                      ELFWriter::ELFSection::SHF_ALLOC);
-  OutBuffer = &ES->SectionData;
-  cerr << "FIXME: This code needs to be updated for changes in the "
-       << "CodeEmitter interfaces.  In particular, this should set "
-       << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!";
-  abort();
+  ES = &EW.getTextSection();
+
+  // FIXME: better memory management, this will be replaced by BinaryObjects
+  ES->SectionData.reserve(4096);
+  BufferBegin = &ES->SectionData[0];
+  BufferEnd = BufferBegin + ES->SectionData.capacity();
 
   // Upgrade the section alignment if required.
   if (ES->Align < Align) ES->Align = Align;
 
-  // Add padding zeros to the end of the buffer to make sure that the
-  // function will start on the correct byte alignment within the section.
-  OutputBuffer OB(*OutBuffer,
-                  TM.getTargetData()->getPointerSizeInBits() == 64,
-                  TM.getTargetData()->isLittleEndian());
-  OB.align(Align);
-  FnStart = OutBuffer->size();
+  // Round the size up to the correct alignment for starting the new function.
+  ES->Size = (ES->Size + (Align-1)) & (-Align);
+
+  // Snaity check on allocated space for text section
+  assert( ES->Size < 4096 && "no more space in TextSection" );
+
+  // FIXME: Using ES->Size directly here instead of calculating it from the
+  // output buffer size (impossible because the code emitter deals only in raw
+  // bytes) forces us to manually synchronize size and write padding zero bytes
+  // to the output buffer for all non-text sections.  For text sections, we do
+  // not synchonize the output buffer, and we just blow up if anyone tries to
+  // write non-code to it.  An assert should probably be added to
+  // AddSymbolToSection to prevent calling it on the text section.
+  CurBufferPtr = BufferBegin + ES->Size;
+
+  // Record function start address relative to BufferBegin
+  FnStartPtr = CurBufferPtr;
 }
 
 /// finishFunction - This callback is invoked after the function is completely
 /// finished.
-bool ELFCodeEmitter::finishFunction(MachineFunction &F) {
-  // We now know the size of the function, add a symbol to represent it.
-  ELFWriter::ELFSym FnSym(F.getFunction());
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+  // Add a symbol to represent the function.
+  ELFSym FnSym(MF.getFunction());
 
   // Figure out the binding (linkage) of the symbol.
-  switch (F.getFunction()->getLinkage()) {
+  switch (MF.getFunction()->getLinkage()) {
   default:
     // appending linkage is illegal for functions.
     assert(0 && "Unknown linkage type!");
   case GlobalValue::ExternalLinkage:
-    FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL);
+    FnSym.SetBind(ELFSym::STB_GLOBAL);
     break;
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
-    FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK);
+    FnSym.SetBind(ELFSym::STB_WEAK);
     break;
   case GlobalValue::PrivateLinkage:
     assert (0 && "PrivateLinkage should not be in the symbol table.");
   case GlobalValue::InternalLinkage:
-    FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL);
+    FnSym.SetBind(ELFSym::STB_LOCAL);
     break;
   }
 
-  ES->Size = OutBuffer->size();
+  // Set the symbol type as a function
+  FnSym.SetType(ELFSym::STT_FUNC);
 
-  FnSym.SetType(ELFWriter::ELFSym::STT_FUNC);
   FnSym.SectionIdx = ES->SectionIdx;
-  FnSym.Value = FnStart;   // Value = Offset from start of Section.
-  FnSym.Size = OutBuffer->size()-FnStart;
+  FnSym.Size = CurBufferPtr-FnStartPtr;
+
+  // Offset from start of Section
+  FnSym.Value = FnStartPtr-BufferBegin;
 
   // Finally, add it to the symtab.
   EW.SymbolTable.push_back(FnSym);
+
+  // Update Section Size
+  ES->Size = CurBufferPtr - BufferBegin;
   return false;
 }
 
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index 11ebcc8..e9ee936 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -21,11 +21,10 @@ namespace llvm {
   class ELFCodeEmitter : public MachineCodeEmitter {
     ELFWriter &EW;
     TargetMachine &TM;
-    ELFWriter::ELFSection *ES;  // Section to write to.
-    std::vector<unsigned char> *OutBuffer;
-    size_t FnStart;
+    ELFSection *ES;  // Section to write to.
+    uint8_t *FnStartPtr;
   public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {}
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
 
     void startFunction(MachineFunction &F);
     bool finishFunction(MachineFunction &F);
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index be8edce..24f12a3 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -33,6 +33,7 @@
 
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
+#include "ELF.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/DerivedTypes.h"
@@ -67,7 +68,8 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(&ID), O(o), TM(tm) {
-  e_flags = 0;    // e_flags defaults to 0, no flags.
+  e_flags = 0;  // e_flags defaults to 0, no flags.
+  e_machine = TM.getELFWriterInfo()->getEMachine();
 
   is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
   isLittleEndian = TM.getTargetData()->isLittleEndian();
@@ -90,24 +92,39 @@ bool ELFWriter::doInitialization(Module &M) {
   std::vector<unsigned char> &FH = FileHeader;
   OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
 
-  FHOut.outbyte(0x7F);                     // EI_MAG0
-  FHOut.outbyte('E');                      // EI_MAG1
-  FHOut.outbyte('L');                      // EI_MAG2
-  FHOut.outbyte('F');                      // EI_MAG3
-  FHOut.outbyte(is64Bit ? 2 : 1);          // EI_CLASS
-  FHOut.outbyte(isLittleEndian ? 1 : 2);   // EI_DATA
-  FHOut.outbyte(1);                        // EI_VERSION
-  FH.resize(16);                         // EI_PAD up to 16 bytes.
-
-  // This should change for shared objects.
-  FHOut.outhalf(1);                 // e_type = ET_REL
-  FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined
-  FHOut.outword(1);                 // e_version = 1
-  FHOut.outaddr(0);                 // e_entry = 0 -> no entry point in .o file
-  FHOut.outaddr(0);                 // e_phoff = 0 -> no program header for .o
-
-  ELFHeader_e_shoff_Offset = FH.size();
-  FHOut.outaddr(0);                 // e_shoff
+  unsigned ElfClass = is64Bit ? ELFCLASS64 : ELFCLASS32;
+  unsigned ElfEndian = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
+
+  // ELF Header
+  // ----------
+  // Fields e_shnum e_shstrndx are only known after all section have
+  // been emitted. They locations in the ouput buffer are recorded so
+  // to be patched up later.
+  //
+  // Note
+  // ----
+  // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing
+  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+  FHOut.outbyte(0x7f); // e_ident[EI_MAG0]
+  FHOut.outbyte('E');  // e_ident[EI_MAG1]
+  FHOut.outbyte('L');  // e_ident[EI_MAG2]
+  FHOut.outbyte('F');  // e_ident[EI_MAG3]
+
+  FHOut.outbyte(ElfClass);   // e_ident[EI_CLASS]
+  FHOut.outbyte(ElfEndian);  // e_ident[EI_DATA]
+  FHOut.outbyte(EV_CURRENT); // e_ident[EI_VERSION]
+
+  FH.resize(16);  // e_ident[EI_NIDENT-EI_PAD]
+
+  FHOut.outhalf(ET_REL);     // e_type
+  FHOut.outhalf(e_machine);  // e_machine = target
+  FHOut.outword(EV_CURRENT); // e_version
+  FHOut.outaddr(0);          // e_entry = 0 -> no entry point in .o file
+  FHOut.outaddr(0);          // e_phoff = 0 -> no program header for .o
+
+  ELFHdr_e_shoff_Offset = FH.size();
+  FHOut.outaddr(0);                 // e_shoff = sec hdr table off in bytes
   FHOut.outword(e_flags);           // e_flags = whatever the target wants
 
   FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size
@@ -115,14 +132,16 @@ bool ELFWriter::doInitialization(Module &M) {
   FHOut.outhalf(0);                 // e_phnum     = # prog header entries = 0
   FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size
 
+  // e_shnum     = # of section header ents
+  ELFHdr_e_shnum_Offset = FH.size();
+  FHOut.outhalf(0);
 
-  ELFHeader_e_shnum_Offset = FH.size();
-  FHOut.outhalf(0);                 // e_shnum     = # of section header ents
-  ELFHeader_e_shstrndx_Offset = FH.size();
-  FHOut.outhalf(0);                 // e_shstrndx  = Section # of '.shstrtab'
+  // e_shstrndx  = Section # of '.shstrtab'
+  ELFHdr_e_shstrndx_Offset = FH.size();
+  FHOut.outhalf(0);
 
   // Add the null section, which is required to be first in the file.
-  getSection("", 0, 0);
+  getSection("", ELFSection::SHT_NULL, 0);
 
   // Start up the symbol table.  The first entry in the symtab is the null
   // entry.
@@ -334,7 +353,7 @@ void ELFWriter::EmitSectionTableStringTable() {
   // Now that we know which section number is the .shstrtab section, update the
   // e_shstrndx entry in the ELF header.
   OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset);
+  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
 
   // Set the NameIdx of each section in the string table and emit the bytes for
   // the string table.
@@ -386,11 +405,11 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   // Now that we know where all of the sections will be emitted, set the e_shnum
   // entry in the ELF header.
   OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset);
+  FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset);
 
   // Now that we know the offset in the file of the section table, update the
   // e_shoff address in the ELF header.
-  FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset);
+  FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset);
 
   // Now that we know all of the data in the file header, emit it and all of the
   // sections!
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index 31aa05a..0389185 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -15,6 +15,7 @@
 #define ELFWRITER_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "ELF.h"
 #include <list>
 #include <map>
 
@@ -82,10 +83,8 @@ namespace llvm {
     /// doInitialization - Emit the file header and all of the global variables
     /// for the module to the ELF file.
     bool doInitialization(Module &M);
-
     bool runOnMachineFunction(MachineFunction &MF);
 
-
     /// doFinalization - Now that the module has been completely processed, emit
     /// the ELF file to 'O'.
     bool doFinalization(Module &M);
@@ -96,53 +95,6 @@ namespace llvm {
     // as well!).
     DataBuffer FileHeader;
 
-    /// ELFSection - This struct contains information about each section that is
-    /// emitted to the file.  This is eventually turned into the section header
-    /// table at the end of the file.
-    struct ELFSection {
-      std::string Name;       // Name of the section.
-      unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
-      unsigned Type;
-      unsigned Flags;
-      uint64_t Addr;
-      unsigned Offset;
-      unsigned Size;
-      unsigned Link;
-      unsigned Info;
-      unsigned Align;
-      unsigned EntSize;
-
-      /// SectionIdx - The number of the section in the Section Table.
-      ///
-      unsigned short SectionIdx;
-
-      /// SectionData - The actual data for this section which we are building
-      /// up for emission to the file.
-      DataBuffer SectionData;
-
-      enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3,
-             SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7,
-             SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 };
-      enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 };
-      enum {   // SHF - ELF Section Header Flags
-        SHF_WRITE            = 1 << 0, // Writable
-        SHF_ALLOC            = 1 << 1, // Mapped into the process addr space
-        SHF_EXECINSTR        = 1 << 2, // Executable
-        SHF_MERGE            = 1 << 4, // Might be merged if equal
-        SHF_STRINGS          = 1 << 5, // Contains null-terminated strings
-        SHF_INFO_LINK        = 1 << 6, // 'sh_info' contains SHT index
-        SHF_LINK_ORDER       = 1 << 7, // Preserve order after combining
-        SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
-        SHF_GROUP            = 1 << 9, // Section is a member of a group
-        SHF_TLS              = 1 << 10 // Section holds thread-local data
-      };
-
-      ELFSection(const std::string &name)
-        : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
-          Link(0), Info(0), Align(0), EntSize(0) {
-      }
-    };
-
     /// SectionList - This is the list of sections that we have emitted to the
     /// file.  Once the file has been completely built, the section header table
     /// is constructed from this info.
@@ -165,9 +117,15 @@ namespace llvm {
       SN->SectionIdx = NumSections++;
       SN->Type = Type;
       SN->Flags = Flags;
+      SN->Link = ELFSection::SHN_UNDEF;
       return *SN;
     }
 
+    ELFSection &getTextSection() {
+      return getSection(".text", ELFSection::SHT_PROGBITS,
+                        ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
+    }
+
     ELFSection &getDataSection() {
       return getSection(".data", ELFSection::SHT_PROGBITS,
                         ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
@@ -177,34 +135,6 @@ namespace llvm {
                         ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
     }
 
-    /// ELFSym - This struct contains information about each symbol that is
-    /// added to logical symbol table for the module.  This is eventually
-    /// turned into a real symbol table in the file.
-    struct ELFSym {
-      const GlobalValue *GV;    // The global value this corresponds to.
-      unsigned NameIdx;         // Index in .strtab of name, once emitted.
-      uint64_t Value;
-      unsigned Size;
-      unsigned char Info;
-      unsigned char Other;
-      unsigned short SectionIdx;
-
-      enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 };
-      enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3,
-             STT_FILE = 4 };
-      ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0),
-                                      Other(0), SectionIdx(0) {}
-
-      void SetBind(unsigned X) {
-        assert(X == (X & 0xF) && "Bind value out of range!");
-        Info = (Info & 0x0F) | (X << 4);
-      }
-      void SetType(unsigned X) {
-        assert(X == (X & 0xF) && "Type value out of range!");
-        Info = (Info & 0xF0) | X;
-      }
-    };
-
     /// SymbolTable - This is the list of symbols we have emitted to the file.
     /// This actually gets rearranged before emission to the file (to put the
     /// local symbols first in the list).
@@ -214,9 +144,9 @@ namespace llvm {
     // (e.g. the location of the section table).  These members keep track of
     // the offset in ELFHeader of these various pieces to update and other
     // locations in the file.
-    unsigned ELFHeader_e_shoff_Offset;     // e_shoff    in ELF header.
-    unsigned ELFHeader_e_shstrndx_Offset;  // e_shstrndx in ELF header.
-    unsigned ELFHeader_e_shnum_Offset;     // e_shnum    in ELF header.
+    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
+    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
+    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
   private:
     void EmitGlobal(GlobalVariable *GV);
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b8c8563..c351593 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
-#include <ostream>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 8520888..804fae5 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -40,7 +40,6 @@
 #include <queue>
 #include <memory>
 #include <cmath>
-#include <iostream>
 
 using namespace llvm;
 
@@ -399,7 +398,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     }
 
     ++NumCoalesce;
-    return SrcReg;
+    return PhysReg;
   }
 
   return Reg;
@@ -543,13 +542,37 @@ void RALinScan::linearScan()
     // Ignore splited live intervals.
     if (!isPhys && vrm_->getPreSplitReg(cur.reg))
       continue;
+
+    // A register defined by an implicit_def can be liveout the def BB and livein
+    // to a use BB. Add it to the livein set of the use BB's.
+    if (!isPhys && cur.empty()) {
+      if (MachineInstr *DefMI = mri_->getVRegDef(cur.reg)) {
+        assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
+        MachineBasicBlock *DefMBB = DefMI->getParent();
+        SmallPtrSet<MachineBasicBlock*, 4> Seen;
+        Seen.insert(DefMBB);
+        for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(cur.reg),
+               re = mri_->reg_end(); ri != re; ++ri) {
+          MachineInstr *UseMI = &*ri;
+          MachineBasicBlock *UseMBB = UseMI->getParent();
+          if (Seen.insert(UseMBB)) {
+            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                   "Adding a virtual register to livein set?");
+            UseMBB->addLiveIn(Reg);
+          }
+        }
+      }
+    }
     for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
          I != E; ++I) {
       const LiveRange &LR = *I;
       if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
         for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
-          if (LiveInMBBs[i] != EntryMBB)
+          if (LiveInMBBs[i] != EntryMBB) {
+            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                   "Adding a virtual register to livein set?");
             LiveInMBBs[i]->addLiveIn(Reg);
+          }
         LiveInMBBs.clear();
       }
     }
@@ -1192,7 +1215,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // The earliest start of a Spilled interval indicates up to where
   // in handled we need to roll back
   
-  unsigned earliestStart = cur->beginNumber();
   LiveInterval *earliestStartInterval = cur;
 
   // Spill live intervals of virtual regs mapped to the physical register we
@@ -1206,19 +1228,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
     DOUT << "\t\t\tspilling(a): " << *sli << '\n';
-    earliestStart = std::min(earliestStart, sli->beginNumber());
     earliestStartInterval =
       (earliestStartInterval->beginNumber() < sli->beginNumber()) ?
          earliestStartInterval : sli;
-    
-    if (earliestStartInterval->beginNumber()!=earliestStart) {
-      epicFail |= true;
-      std::cerr << "What the 1 - "
-      		<< "earliestStart = " << earliestStart
-      		<< "earliestStartInterval = " << earliestStartInterval->beginNumber()
-      		<< "\n";
-    }
-   
+       
     std::vector<LiveInterval*> newIs;
     if (!NewSpillFramework) {
       newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_);
@@ -1229,20 +1242,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
     spilled.insert(sli->reg);
 
-    if (earliestStartInterval->beginNumber()!=earliestStart) {
-      epicFail |= true;
-      std::cerr << "What the 2 - "
-      		<< "earliestStart = " << earliestStart
-      		<< "earliestStartInterval = " << earliestStartInterval->beginNumber()
-      		<< "\n";
-    }
-
     if (epicFail) {
       //abort();
     }
   }
 
-  earliestStart = earliestStartInterval->beginNumber();
+  unsigned earliestStart = earliestStartInterval->beginNumber();
 
   DOUT << "\t\trolling back to: " << earliestStart << '\n';
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4c1710d..609ec82 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
   assert(N->getOpcode() == ISD::BUILD_PAIR);
 
-  SDNode *LD1 = getBuildPairElt(N, 0);
-  if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
     return SDValue();
   MVT LD1VT = LD1->getValueType(0);
-  SDNode *LD2 = getBuildPairElt(N, 1);
   const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
 
   if (ISD::isNON_EXTLoad(LD2) &&
       LD2->hasOneUse() &&
       // If both are volatile this would reduce the number of volatile loads.
       // If one is volatile it might be ok, but play conservative and bail out.
-      !cast<LoadSDNode>(LD1)->isVolatile() &&
-      !cast<LoadSDNode>(LD2)->isVolatile() &&
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
       TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
-    LoadSDNode *LD = cast<LoadSDNode>(LD1);
-    unsigned Align = LD->getAlignment();
+    unsigned Align = LD1->getAlignment();
     unsigned NewAlign = TLI.getTargetData()->
       getABITypeAlignment(VT.getTypeForMVT());
 
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
-      return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
-                         LD->getSrcValue(), LD->getSrcValueOffset(),
-                         false, Align);
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getSrcValue(),
+                         LD1->getSrcValueOffset(), false, Align);
   }
 
   return SDValue();
@@ -4019,6 +4018,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold (fmul A, 0) -> 0
   if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
     return N1;
+  // fold (fmul A, 0) -> 0, vector edition.
+  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+    return N1;
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 6becff3..4a7dbeb 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -639,18 +639,18 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
 bool
 FastISel::SelectOperator(User *I, unsigned Opcode) {
   switch (Opcode) {
-  case Instruction::Add: {
-    ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD;
-    return SelectBinaryOp(I, Opc);
-  }
-  case Instruction::Sub: {
-    ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB;
-    return SelectBinaryOp(I, Opc);
-  }
-  case Instruction::Mul: {
-    ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL;
-    return SelectBinaryOp(I, Opc);
-  }
+  case Instruction::Add:
+    return SelectBinaryOp(I, ISD::ADD);
+  case Instruction::FAdd:
+    return SelectBinaryOp(I, ISD::FADD);
+  case Instruction::Sub:
+    return SelectBinaryOp(I, ISD::SUB);
+  case Instruction::FSub:
+    return SelectBinaryOp(I, ISD::FSUB);
+  case Instruction::Mul:
+    return SelectBinaryOp(I, ISD::MUL);
+  case Instruction::FMul:
+    return SelectBinaryOp(I, ISD::FMUL);
   case Instruction::SDiv:
     return SelectBinaryOp(I, ISD::SDIV);
   case Instruction::UDiv:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2cd67e6..5ae183e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -116,6 +116,8 @@ private:
   /// result.
   SDValue LegalizeOp(SDValue O);
 
+  SDValue OptimizeFloatStore(StoreSDNode *ST);
+
   /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
   /// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
   /// is necessary to spill the vector being inserted into to memory, perform
@@ -165,6 +167,7 @@ private:
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
   void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -681,6 +684,59 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
   return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
 }
 
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  // FIXME: We shouldn't do this for TargetConstantFP's.
+  // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+  // to phase ordering between legalized code and the dag combiner.  This
+  // probably means that we need to integrate dag combiner and legalizer
+  // together.
+  // We generally can't do this one for long doubles.
+  SDValue Tmp1 = ST->getChain();
+  SDValue Tmp2 = ST->getBasePtr();
+  SDValue Tmp3;
+  int SVOffset = ST->getSrcValueOffset();
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+    if (CFP->getValueType(0) == MVT::f32 &&
+        getTypeAction(MVT::i32) == Legal) {
+      Tmp3 = DAG.getConstant(CFP->getValueAPF().
+                                      bitcastToAPInt().zextOrTrunc(32),
+                              MVT::i32);
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+    } else if (CFP->getValueType(0) == MVT::f64) {
+      // If this target supports 64-bit registers, do a single 64-bit store.
+      if (getTypeAction(MVT::i64) == Legal) {
+        Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                  zextOrTrunc(64), MVT::i64);
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                            SVOffset, isVolatile, Alignment);
+      } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+        // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+        // stores.  If the target supports neither 32- nor 64-bits, this
+        // xform is certainly not worth it.
+        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                            DAG.getIntPtrConstant(4));
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+                          isVolatile, MinAlign(Alignment, 4U));
+
+        return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      }
+    }
+  }
+  return SDValue();
+}
+
 /// LegalizeOp - We know that the specified value has a legal type, and
 /// that its operands are legal.  Now ensure that the operation itself
 /// is legal, recursively ensuring that the operands' operations remain
@@ -1293,50 +1349,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     bool isVolatile = ST->isVolatile();
 
     if (!ST->isTruncatingStore()) {
-      // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
-      // FIXME: We shouldn't do this for TargetConstantFP's.
-      // FIXME: move this to the DAG Combiner!  Note that we can't regress due
-      // to phase ordering between legalized code and the dag combiner.  This
-      // probably means that we need to integrate dag combiner and legalizer
-      // together.
-      // We generally can't do this one for long doubles.
-      if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
-        if (CFP->getValueType(0) == MVT::f32 &&
-            getTypeAction(MVT::i32) == Legal) {
-          Tmp3 = DAG.getConstant(CFP->getValueAPF().
-                                          bitcastToAPInt().zextOrTrunc(32),
-                                  MVT::i32);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, Alignment);
-          break;
-        } else if (CFP->getValueType(0) == MVT::f64) {
-          // If this target supports 64-bit registers, do a single 64-bit store.
-          if (getTypeAction(MVT::i64) == Legal) {
-            Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                     zextOrTrunc(64), MVT::i64);
-            Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                  SVOffset, isVolatile, Alignment);
-            break;
-          } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
-            // Otherwise, if the target supports 32-bit registers, use 2 32-bit
-            // stores.  If the target supports neither 32- nor 64-bits, this
-            // xform is certainly not worth it.
-            const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
-            SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
-            SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
-            if (TLI.isBigEndian()) std::swap(Lo, Hi);
-
-            Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                              SVOffset, isVolatile, Alignment);
-            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                               DAG.getIntPtrConstant(4));
-            Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
-                              isVolatile, MinAlign(Alignment, 4U));
-
-            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-            break;
-          }
-        }
+      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+        Result = SDValue(OptStore, 0);
+        break;
       }
 
       {
@@ -1510,6 +1525,46 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
 }
 
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+  // We can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  // Create the stack frame object.
+  MVT VT = Node->getValueType(0);
+  MVT OpVT = Node->getOperand(0).getValueType();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue FIPtr = DAG.CreateStackTemporary(VT);
+  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+  const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDValue, 8> Stores;
+  unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    unsigned Offset = TypeByteSize*i;
+
+    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+    Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+                                  Idx, SV, Offset));
+  }
+
+  SDValue StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+}
+
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1 = Node->getOperand(0);
@@ -1853,40 +1908,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     }
   }
 
-  // Otherwise, we can't handle this case efficiently.  Allocate a sufficiently
-  // aligned object on the stack, store each element into it, then load
-  // the result as a vector.
-  // Create the stack frame object.
-  SDValue FIPtr = DAG.CreateStackTemporary(VT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store of each element to the stack slot.
-  SmallVector<SDValue, 8> Stores;
-  unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
-  // Store (in the right endianness) the elements to memory.
-  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-    // Ignore undef elements.
-    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-
-    unsigned Offset = TypeByteSize*i;
-
-    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
-    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
-
-    Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
-                                  Idx, SV, Offset));
-  }
-
-  SDValue StoreChain;
-  if (!Stores.empty())    // Not all undef elements?
-    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                             &Stores[0], Stores.size());
-  else
-    StoreChain = DAG.getEntryNode();
-
-  // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+  // Otherwise, we can't handle this case efficiently.
+  return ExpandVectorBuildThroughStack(Node);
 }
 
 // ExpandLibCall - Expand a node into a call to a libcall.  If the result value
@@ -2437,23 +2460,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
     break;
   case ISD::CONCAT_VECTORS: {
-    // Use extract/insert/build vector for now. We might try to be
-    // more clever later.
-    SmallVector<SDValue, 8> Ops;
-    unsigned NumOperands = Node->getNumOperands();
-    for (unsigned i=0; i < NumOperands; ++i) {
-      SDValue SubOp = Node->getOperand(i);
-      MVT VVT = SubOp.getNode()->getValueType(0);
-      MVT EltVT = VVT.getVectorElementType();
-      unsigned NumSubElem = VVT.getVectorNumElements();
-      for (unsigned j=0; j < NumSubElem; ++j) {
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
-                                  DAG.getIntPtrConstant(j)));
-      }
-    }
-    Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                       &Ops[0], Ops.size());
-    Results.push_back(Tmp1);
+    Results.push_back(ExpandVectorBuildThroughStack(Node));
     break;
   }
   case ISD::SCALAR_TO_VECTOR:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index eb9342c..0c826f6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -356,13 +356,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   unsigned NewOpc = N->getOpcode();
   DebugLoc dl = N->getDebugLoc();
 
-  // If we're promoting a UINT to a larger size, check to see if the new node
-  // will be legal.  If it isn't, check to see if FP_TO_SINT is legal, since
-  // we can use that instead.  This allows us to generate better code for
-  // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
-  // legal, such as PowerPC.
+  // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+  // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
+  // and SINT conversions are Custom, there is no way to tell which is preferable.
+  // We choose SINT because that's the right thing on PPC.)  
   if (N->getOpcode() == ISD::FP_TO_UINT &&
-      !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) &&
+      !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     NewOpc = ISD::FP_TO_SINT;
 
@@ -1747,7 +1746,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::SDIV_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::SDIV_I64;
@@ -1909,7 +1910,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::SREM_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::SREM_I64;
@@ -1938,7 +1941,9 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::UDIV_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::UDIV_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::UDIV_I64;
@@ -1956,7 +1961,9 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::UREM_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::UREM_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::UREM_I64;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index df9af21..335c73c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -129,6 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   if (!HasVectorValue)
     return TranslateLegalizeResults(Op, Result);
 
+  MVT QueryType;
   switch (Op.getOpcode()) {
   default:
     return TranslateLegalizeResults(Op, Result);
@@ -162,8 +163,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::ANY_EXTEND:
   case ISD::TRUNCATE:
   case ISD::SIGN_EXTEND:
-  case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::FNEG:
@@ -183,10 +182,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+    QueryType = Node->getValueType(0);
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    QueryType = Node->getOperand(0).getValueType();
     break;
   }
 
-  switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
   case TargetLowering::Promote:
     // "Promote" the operation by bitcasting
     Result = PromoteVectorOp(Op);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 195896e..a9adce8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -154,7 +154,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   // Do not accept an all-undef vector.
   if (i == e) return false;
 
-  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // Do not accept build_vectors that aren't all constants or which have non-0
   // elements.
   SDValue Zero = N->getOperand(i);
   if (isa<ConstantSDNode>(Zero)) {
@@ -166,7 +166,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   } else
     return false;
 
-  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // Okay, we have at least one 0 value, check to see if the rest match or are
   // undefs.
   for (++i; i != e; ++i)
     if (N->getOperand(i) != Zero &&
@@ -2807,16 +2807,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     case ISD::ADDC:
     case ISD::ADDE:
     case ISD::SUB:
-    case ISD::FADD:
-    case ISD::FSUB:
-    case ISD::FMUL:
-    case ISD::FDIV:
-    case ISD::FREM:
     case ISD::UDIV:
     case ISD::SDIV:
     case ISD::UREM:
     case ISD::SREM:
       return N2;       // fold op(arg1, undef) -> undef
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+      if (UnsafeFPMath)
+        return N2;
+      break;
     case ISD::MUL:
     case ISD::AND:
     case ISD::SRL:
@@ -3059,7 +3062,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
   isSrcStr = isMemSrcFromString(Src, Str);
   bool isSrcConst = isa<ConstantSDNode>(Src);
   bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
-  MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr);
+  MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
   if (VT != MVT::iAny) {
     unsigned NewAlign = (unsigned)
       TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 889d7f5..93750d6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -842,20 +842,6 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
   }
 }
 
-void SelectionDAGLowering::visitAdd(User &I) {
-  if (I.getType()->isFPOrFPVector())
-    visitBinary(I, ISD::FADD);
-  else
-    visitBinary(I, ISD::ADD);
-}
-
-void SelectionDAGLowering::visitMul(User &I) {
-  if (I.getType()->isFPOrFPVector())
-    visitBinary(I, ISD::FMUL);
-  else
-    visitBinary(I, ISD::MUL);
-}
-
 SDValue SelectionDAGLowering::getValue(const Value *V) {
   SDValue &N = NodeMap[V];
   if (N.getNode()) return N;
@@ -2161,37 +2147,33 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
 }
 
 
-void SelectionDAGLowering::visitSub(User &I) {
+void SelectionDAGLowering::visitFSub(User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
   if (isa<VectorType>(Ty)) {
     if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
       const VectorType *DestTy = cast<VectorType>(I.getType());
       const Type *ElTy = DestTy->getElementType();
-      if (ElTy->isFloatingPoint()) {
-        unsigned VL = DestTy->getNumElements();
-        std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-        Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-        if (CV == CNZ) {
-          SDValue Op2 = getValue(I.getOperand(1));
-          setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                   Op2.getValueType(), Op2));
-          return;
-        }
-      }
-    }
-  }
-  if (Ty->isFloatingPoint()) {
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+      unsigned VL = DestTy->getNumElements();
+      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+      if (CV == CNZ) {
         SDValue Op2 = getValue(I.getOperand(1));
         setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
                                  Op2.getValueType(), Op2));
         return;
       }
+    }
   }
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+      SDValue Op2 = getValue(I.getOperand(1));
+      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                               Op2.getValueType(), Op2));
+      return;
+    }
 
-  visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+  visitBinary(I, ISD::FSUB);
 }
 
 void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index 578aa591..057c841 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -469,9 +469,12 @@ private:
 
   void visitBinary(User &I, unsigned OpCode);
   void visitShift(User &I, unsigned Opcode);
-  void visitAdd(User &I);
-  void visitSub(User &I);
-  void visitMul(User &I);
+  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(User &I);
+  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
   void visitURem(User &I) { visitBinary(I, ISD::UREM); }
   void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
   void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3334e53..ab4cd51 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
 }
 
 
-/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-/// location that the 'Base' load is loading from.
-bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
-                                       unsigned Bytes, int Dist,
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+/// location that is 'Dist' units away from the location that the 'Base' load 
+/// is loading from.
+bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+                                       unsigned Bytes, int Dist, 
                                        const MachineFrameInfo *MFI) const {
-  if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())
+  if (LD->getChain() != Base->getChain())
     return false;
   MVT VT = LD->getValueType(0);
   if (VT.getSizeInBits() / 8 != Bytes)
@@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
     if (FS != BFS || FS != (int)Bytes) return false;
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
+  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+    if (V && (V->getSExtValue() == Dist*Bytes))
+      return true;
+  }
 
   GlobalValue *GV1 = NULL;
   GlobalValue *GV2 = NULL;
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index c31f622..bd6584a 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -33,99 +33,21 @@ STATISTIC(NumSUnfold , "Number of stores unfolded");
 STATISTIC(NumModRefUnfold, "Number of modref unfolded");
 
 namespace {
-  enum RewriterName { simple, local, trivial };
+  enum RewriterName { local, trivial };
 }
 
 static cl::opt<RewriterName>
 RewriterOpt("rewriter",
             cl::desc("Rewriter to use: (default: local)"),
             cl::Prefix,
-            cl::values(clEnumVal(simple,  "simple rewriter"),
-                       clEnumVal(local,   "local rewriter"),
+            cl::values(clEnumVal(local,   "local rewriter"),
                        clEnumVal(trivial, "trivial rewriter"),
                        clEnumValEnd),
             cl::init(local));
 
 VirtRegRewriter::~VirtRegRewriter() {}
 
- 
-// ****************************** //
-// Simple Spiller Implementation  //
-// ****************************** //
-
-struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter {
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs) {
-    DOUT << "********** REWRITE MACHINE CODE **********\n";
-    DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
-    const TargetMachine &TM = MF.getTarget();
-    const TargetInstrInfo &TII = *TM.getInstrInfo();
-    const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
-
-
-    // LoadedRegs - Keep track of which vregs are loaded, so that we only load
-    // each vreg once (in the case where a spilled vreg is used by multiple
-    // operands).  This is always smaller than the number of operands to the
-    // current machine instr, so it should be small.
-    std::vector<unsigned> LoadedRegs;
-
-    for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
-         MBBI != E; ++MBBI) {
-      DOUT << MBBI->getBasicBlock()->getName() << ":\n";
-      MachineBasicBlock &MBB = *MBBI;
-      for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
-           MII != E; ++MII) {
-        MachineInstr &MI = *MII;
-        for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-          MachineOperand &MO = MI.getOperand(i);
-          if (MO.isReg() && MO.getReg()) {
-            if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-              unsigned VirtReg = MO.getReg();
-              unsigned SubIdx = MO.getSubReg();
-              unsigned PhysReg = VRM.getPhys(VirtReg);
-              unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg;
-              if (!VRM.isAssignedReg(VirtReg)) {
-                int StackSlot = VRM.getStackSlot(VirtReg);
-                const TargetRegisterClass* RC = 
-                                             MF.getRegInfo().getRegClass(VirtReg);
-                
-                if (MO.isUse() &&
-                    std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg)
-                             == LoadedRegs.end()) {
-                  TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
-                  MachineInstr *LoadMI = prior(MII);
-                  VRM.addSpillSlotUse(StackSlot, LoadMI);
-                  LoadedRegs.push_back(VirtReg);
-                  ++NumLoads;
-                  DOUT << '\t' << *LoadMI;
-                }
-
-                if (MO.isDef()) {
-                  TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true,   
-                                          StackSlot, RC);
-                  MachineInstr *StoreMI = next(MII);
-                  VRM.addSpillSlotUse(StackSlot, StoreMI);
-                  ++NumStores;
-                }
-              }
-              MF.getRegInfo().setPhysRegUsed(RReg);
-              MI.getOperand(i).setReg(RReg);
-              MI.getOperand(i).setSubReg(0);
-            } else {
-              MF.getRegInfo().setPhysRegUsed(MO.getReg());
-            }
-          }
-        }
-
-        DOUT << '\t' << MI;
-        LoadedRegs.clear();
-      }
-    }
-    return true;
-  }
 
-};
  
 /// This class is intended for use with the new spilling framework only. It
 /// rewrites vreg def/uses to use the assigned preg, but does not insert any
@@ -2231,8 +2153,6 @@ llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
   default: assert(0 && "Unreachable!");
   case local:
     return new LocalRewriter();
-  case simple:
-    return new SimpleRewriter();
   case trivial:
     return new TrivialRewriter();
   }
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 29a05bb..a80513f 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -573,8 +573,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       return GV;
     }
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::URem:
@@ -605,11 +608,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::FloatTyID:
         switch (CE->getOpcode()) {
           default: assert(0 && "Invalid float opcode"); abort();
-          case Instruction::Add:  
+          case Instruction::FAdd:
             GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
-          case Instruction::Sub:  
+          case Instruction::FSub:
             GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
-          case Instruction::Mul:  
+          case Instruction::FMul:
             GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
           case Instruction::FDiv: 
             GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
@@ -620,11 +623,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::DoubleTyID:
         switch (CE->getOpcode()) {
           default: assert(0 && "Invalid double opcode"); abort();
-          case Instruction::Add:  
+          case Instruction::FAdd:
             GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
-          case Instruction::Sub:  
+          case Instruction::FSub:
             GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
-          case Instruction::Mul:  
+          case Instruction::FMul:
             GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
           case Instruction::FDiv: 
             GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
@@ -638,15 +641,15 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
         APFloat apfLHS = APFloat(LHS.IntVal);
         switch (CE->getOpcode()) {
           default: assert(0 && "Invalid long double opcode"); abort();
-          case Instruction::Add:  
+          case Instruction::FAdd:
             apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
-          case Instruction::Sub:  
+          case Instruction::FSub:
             apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
-          case Instruction::Mul:  
+          case Instruction::FMul:
             apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 765fed2..7dfeae0 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -64,45 +64,35 @@ void Interpreter::initializeExecutionEngine() {
      Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
      break
 
-#define IMPLEMENT_INTEGER_BINOP1(OP, TY) \
-   case Type::IntegerTyID: { \
-     Dest.IntVal = Src1.IntVal OP Src2.IntVal; \
-     break; \
-   }
-
-
-static void executeAddInst(GenericValue &Dest, GenericValue Src1, 
-                           GenericValue Src2, const Type *Ty) {
+static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
   switch (Ty->getTypeID()) {
-    IMPLEMENT_INTEGER_BINOP1(+, Ty);
     IMPLEMENT_BINARY_OPERATOR(+, Float);
     IMPLEMENT_BINARY_OPERATOR(+, Double);
   default:
-    cerr << "Unhandled type for Add instruction: " << *Ty << "\n";
+    cerr << "Unhandled type for FAdd instruction: " << *Ty << "\n";
     abort();
   }
 }
 
-static void executeSubInst(GenericValue &Dest, GenericValue Src1, 
-                           GenericValue Src2, const Type *Ty) {
+static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
   switch (Ty->getTypeID()) {
-    IMPLEMENT_INTEGER_BINOP1(-, Ty);
     IMPLEMENT_BINARY_OPERATOR(-, Float);
     IMPLEMENT_BINARY_OPERATOR(-, Double);
   default:
-    cerr << "Unhandled type for Sub instruction: " << *Ty << "\n";
+    cerr << "Unhandled type for FSub instruction: " << *Ty << "\n";
     abort();
   }
 }
 
-static void executeMulInst(GenericValue &Dest, GenericValue Src1, 
-                           GenericValue Src2, const Type *Ty) {
+static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
   switch (Ty->getTypeID()) {
-    IMPLEMENT_INTEGER_BINOP1(*, Ty);
     IMPLEMENT_BINARY_OPERATOR(*, Float);
     IMPLEMENT_BINARY_OPERATOR(*, Double);
   default:
-    cerr << "Unhandled type for Mul instruction: " << *Ty << "\n";
+    cerr << "Unhandled type for FMul instruction: " << *Ty << "\n";
     abort();
   }
 }
@@ -550,11 +540,14 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   GenericValue R;   // Result
 
   switch (I.getOpcode()) {
-  case Instruction::Add:   executeAddInst  (R, Src1, Src2, Ty); break;
-  case Instruction::Sub:   executeSubInst  (R, Src1, Src2, Ty); break;
-  case Instruction::Mul:   executeMulInst  (R, Src1, Src2, Ty); break;
-  case Instruction::FDiv:  executeFDivInst (R, Src1, Src2, Ty); break;
-  case Instruction::FRem:  executeFRemInst (R, Src1, Src2, Ty); break;
+  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
   case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
   case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
   case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
@@ -1258,18 +1251,21 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
   GenericValue Dest;
   const Type * Ty = CE->getOperand(0)->getType();
   switch (CE->getOpcode()) {
-  case Instruction::Add:  executeAddInst (Dest, Op0, Op1, Ty); break;
-  case Instruction::Sub:  executeSubInst (Dest, Op0, Op1, Ty); break;
-  case Instruction::Mul:  executeMulInst (Dest, Op0, Op1, Ty); break;
+  case Instruction::Add:  Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
+  case Instruction::Sub:  Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
+  case Instruction::Mul:  Dest.IntVal = Op0.IntVal * Op1.IntVal; break;
+  case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break;
   case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
   case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
   case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
   case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
   case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
   case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
-  case Instruction::And:  Dest.IntVal = Op0.IntVal.And(Op1.IntVal); break;
-  case Instruction::Or:   Dest.IntVal = Op0.IntVal.Or(Op1.IntVal); break;
-  case Instruction::Xor:  Dest.IntVal = Op0.IntVal.Xor(Op1.IntVal); break;
+  case Instruction::And:  Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
+  case Instruction::Or:   Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
+  case Instruction::Xor:  Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
   case Instruction::Shl:  
     Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
     break;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 89131a0..43f23e4 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -551,7 +551,7 @@ namespace {
 
     // When outputting a function stub in the context of some other function, we
     // save BufferBegin/BufferEnd/CurBufferPtr here.
-    unsigned char *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
 
     /// Relocations - These are the relocations that the function needs, as
     /// emitted.
@@ -891,8 +891,11 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
       break;
     }
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::URem:
@@ -1056,11 +1059,11 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   
   // FnStart is the start of the text, not the start of the constant pool and
   // other per-function data.
-  unsigned char *FnStart =
-    (unsigned char *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+  uint8_t *FnStart =
+    (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
 
   // FnEnd is the end of the function's machine code.
-  unsigned char *FnEnd = CurBufferPtr;
+  uint8_t *FnEnd = CurBufferPtr;
 
   if (!Relocations.empty()) {
     CurFn = F.getFunction();
@@ -1183,7 +1186,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     } else {
       DOUT << "JIT: Binary code:\n";
       DOUT << std::hex;
-      unsigned char* q = FnStart;
+      uint8_t* q = FnStart;
       for (int i = 0; q < FnEnd; q += 4, ++i) {
         if (i == 4)
           i = 0;
@@ -1221,7 +1224,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
                                                              ActualSize);
     BufferEnd = BufferBegin+ActualSize;
-    unsigned char* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd);
+    uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd);
     MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
                               FrameRegister);
     BufferBegin = SavedBufferBegin;
@@ -1416,7 +1419,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer,
   SavedBufferEnd = BufferEnd;
   SavedCurBufferPtr = CurBufferPtr;
   
-  BufferBegin = CurBufferPtr = (unsigned char *)Buffer;
+  BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
   BufferEnd = BufferBegin+StubSize+1;
 }
 
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 2819b6d..70ccdcc 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -257,9 +257,9 @@ namespace {
     // When emitting code into a memory block, this is the block.
     MemoryRangeHeader *CurBlock;
     
-    unsigned char *CurStubPtr, *StubBase;
-    unsigned char *GOTBase;      // Target Specific reserved memory
-    void *DlsymTable;            // Stub external symbol information
+    uint8_t *CurStubPtr, *StubBase;
+    uint8_t *GOTBase;     // Target Specific reserved memory
+    void *DlsymTable;     // Stub external symbol information
 
     // Centralize memory block allocation.
     sys::MemoryBlock getNewMemoryBlock(unsigned size);
@@ -273,12 +273,12 @@ namespace {
     void AllocateGOT();
     void SetDlsymTable(void *);
     
-    unsigned char *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                unsigned Alignment);
+    uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                          unsigned Alignment);
     
     /// startFunctionBody - When a function starts, allocate a block of free
     /// executable memory, returning a pointer to it and its actual size.
-    unsigned char *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+    uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
       
       FreeRangeHeader* candidateBlock = FreeMemoryList;
       FreeRangeHeader* head = FreeMemoryList;
@@ -301,18 +301,18 @@ namespace {
       // Allocate the entire memory block.
       FreeMemoryList = candidateBlock->AllocateBlock();
       ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader);
-      return (unsigned char *)(CurBlock+1);
+      return (uint8_t *)(CurBlock+1);
     }
     
     /// endFunctionBody - The function F is now allocated, and takes the memory
     /// in the range [FunctionStart,FunctionEnd).
-    void endFunctionBody(const Function *F, unsigned char *FunctionStart,
-                         unsigned char *FunctionEnd) {
+    void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                         uint8_t *FunctionEnd) {
       assert(FunctionEnd > FunctionStart);
-      assert(FunctionStart == (unsigned char *)(CurBlock+1) &&
+      assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
              "Mismatched function start/end!");
 
-      uintptr_t BlockSize = FunctionEnd - (unsigned char *)CurBlock;
+      uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
       FunctionBlocks[F] = CurBlock;
 
       // Release the memory at the end of this block that isn't needed.
@@ -320,17 +320,17 @@ namespace {
     }
 
     /// allocateSpace - Allocate a memory block of the given size.
-    unsigned char *allocateSpace(intptr_t Size, unsigned Alignment) {
+    uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
       CurBlock = FreeMemoryList;
       FreeMemoryList = FreeMemoryList->AllocateBlock();
 
-      unsigned char *result = (unsigned char *)CurBlock+1;
+      uint8_t *result = (uint8_t *)CurBlock+1;
 
       if (Alignment == 0) Alignment = 1;
-      result = (unsigned char*)(((intptr_t)result+Alignment-1) &
+      result = (uint8_t*)(((intptr_t)result+Alignment-1) &
                ~(intptr_t)(Alignment-1));
 
-      uintptr_t BlockSize = result + Size - (unsigned char *)CurBlock;
+      uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
       FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
 
       return result;
@@ -338,28 +338,26 @@ namespace {
 
     /// startExceptionTable - Use startFunctionBody to allocate memory for the 
     /// function's exception table.
-    unsigned char* startExceptionTable(const Function* F, 
-                                       uintptr_t &ActualSize) {
+    uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
       return startFunctionBody(F, ActualSize);
     }
 
     /// endExceptionTable - The exception table of F is now allocated, 
     /// and takes the memory in the range [TableStart,TableEnd).
-    void endExceptionTable(const Function *F, unsigned char *TableStart,
-                           unsigned char *TableEnd, 
-                           unsigned char* FrameRegister) {
+    void endExceptionTable(const Function *F, uint8_t *TableStart,
+                           uint8_t *TableEnd, uint8_t* FrameRegister) {
       assert(TableEnd > TableStart);
-      assert(TableStart == (unsigned char *)(CurBlock+1) &&
+      assert(TableStart == (uint8_t *)(CurBlock+1) &&
              "Mismatched table start/end!");
       
-      uintptr_t BlockSize = TableEnd - (unsigned char *)CurBlock;
+      uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
       TableBlocks[F] = CurBlock;
 
       // Release the memory at the end of this block that isn't needed.
       FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
     }
     
-    unsigned char *getGOTBase() const {
+    uint8_t *getGOTBase() const {
       return GOTBase;
     }
     
@@ -433,7 +431,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() {
   sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20);
 #endif
 
-  unsigned char *MemBase = static_cast<unsigned char*>(MemBlock.base());
+  uint8_t *MemBase = static_cast<uint8_t*>(MemBlock.base());
 
   // Allocate stubs backwards from the base, allocate functions forward
   // from the base.
@@ -492,7 +490,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() {
 
 void DefaultJITMemoryManager::AllocateGOT() {
   assert(GOTBase == 0 && "Cannot allocate the got multiple times");
-  GOTBase = new unsigned char[sizeof(void*) * 8192];
+  GOTBase = new uint8_t[sizeof(void*) * 8192];
   HasGOT = true;
 }
 
@@ -508,12 +506,12 @@ DefaultJITMemoryManager::~DefaultJITMemoryManager() {
   Blocks.clear();
 }
 
-unsigned char *DefaultJITMemoryManager::allocateStub(const GlobalValue* F,
+uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F,
                                                      unsigned StubSize,
                                                      unsigned Alignment) {
   CurStubPtr -= StubSize;
-  CurStubPtr = (unsigned char*)(((intptr_t)CurStubPtr) &
-                                ~(intptr_t)(Alignment-1));
+  CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) &
+                          ~(intptr_t)(Alignment-1));
   if (CurStubPtr < StubBase) {
     // FIXME: allocate a new block
     fprintf(stderr, "JIT ran out of memory for function stubs!\n");
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 6ac37bc..42e6fda 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
 #include "llvm/System/Program.h"
+#include "llvm/System/Process.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
@@ -301,6 +302,35 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
   return pos;  
 }
 
+raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
+                                         bool bg) {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode =
+    (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
+    : sys::Process::OutputColor(colors, bold, bg);
+  if (colorcode) {
+    unsigned len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
+raw_ostream &raw_fd_ostream::resetColor() {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode = sys::Process::ResetColor();
+  if (colorcode) {
+    unsigned len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
 //===----------------------------------------------------------------------===//
 //  raw_stdout/err_ostream
 //===----------------------------------------------------------------------===//
diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc
index 74b9bb8..2da31c9 100644
--- a/lib/System/Unix/Process.inc
+++ b/lib/System/Unix/Process.inc
@@ -235,3 +235,62 @@ unsigned Process::StandardErrColumns() {
 
   return getColumns(2);
 }
+
+static bool terminalHasColors() {
+  if (const char *term = std::getenv("TERM")) {
+    // Most modern terminals support ANSI escape sequences for colors.
+    // We could check terminfo, or have a list of known terms that support
+    // colors, but that would be overkill.
+    // The user can always ask for no colors by setting TERM to dumb, or
+    // using a commandline flag.
+    return strcmp(term, "dumb") != 0;
+  }
+  return false;
+}
+
+bool Process::StandardOutHasColors() {
+  if (!StandardOutIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::StandardErrHasColors() {
+  if (!StandardErrIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::ColorNeedsFlush() {
+  // No, we use ANSI escape sequences.
+  return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+    COLOR(FGBG, "0", BOLD),\
+    COLOR(FGBG, "1", BOLD),\
+    COLOR(FGBG, "2", BOLD),\
+    COLOR(FGBG, "3", BOLD),\
+    COLOR(FGBG, "4", BOLD),\
+    COLOR(FGBG, "5", BOLD),\
+    COLOR(FGBG, "6", BOLD),\
+    COLOR(FGBG, "7", BOLD)\
+  }
+
+static const char* colorcodes[2][2][8] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+  return "\033[1m";
+}
+
+const char *Process::ResetColor() {
+  return "\033[0m";
+}
diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc
index e1d7a92..cfbe33c 100644
--- a/lib/System/Win32/Process.inc
+++ b/lib/System/Win32/Process.inc
@@ -147,4 +147,71 @@ unsigned Process::StandardErrColumns() {
   return Columns;
 }
 
+// It always has colors.
+bool Process::StandardErrHasColors() {
+  return StandardErrIsDisplayed();
+}
+
+bool Process::StandardOutHasColors() {
+  return StandardOutIsDisplayed();
+}
+
+namespace {
+class DefaultColors
+{
+  private:
+    WORD defaultColor;
+  public:
+    DefaultColors()
+     :defaultColor(GetCurrentColor()) {}
+    static unsigned GetCurrentColor() {
+      CONSOLE_SCREEN_BUFFER_INFO csbi;
+      if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+        return csbi.wAttributes;
+      return 0;
+    }
+    WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+  return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+  WORD colors = DefaultColors::GetCurrentColor();
+  if (bg)
+    colors |= BACKGROUND_INTENSITY;
+  else
+    colors |= FOREGROUND_INTENSITY;
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  WORD colors;
+  if (bg) {
+    colors = ((code&1) ? BACKGROUND_RED : 0) |
+      ((code&2) ? BACKGROUND_GREEN : 0 ) |
+      ((code&4) ? BACKGROUND_BLUE : 0);
+    if (bold)
+      colors |= BACKGROUND_INTENSITY;
+  } else {
+    colors = ((code&1) ? FOREGROUND_RED : 0) |
+      ((code&2) ? FOREGROUND_GREEN : 0 ) |
+      ((code&4) ? FOREGROUND_BLUE : 0);
+    if (bold)
+      colors |= FOREGROUND_INTENSITY;
+  }
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::ResetColor() {
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+  return 0;
+}
+
 }
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 3a038c9..a75ed3b 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Type.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
-#include <ostream>
 using namespace llvm;
 
 ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 09b8ce0..963ff0d 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -64,11 +64,15 @@ namespace {
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     typedef MemOpQueue::iterator MemOpQueueIter;
 
-    SmallVector<MachineBasicBlock::iterator, 4>
-    MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
-                 int Opcode, unsigned Size,
-                 ARMCC::CondCodes Pred, unsigned PredReg,
-                 unsigned Scratch, MemOpQueue &MemOps);
+    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  int Offset, unsigned Base, bool BaseKill, int Opcode,
+                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+                      int Opcode, unsigned Size,
+                      ARMCC::CondCodes Pred, unsigned PredReg,
+                      unsigned Scratch, MemOpQueue &MemOps,
+                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
 
     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
@@ -108,16 +112,16 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
   return 0;
 }
 
-/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
 /// registers in Regs as the register operands that would be loaded / stored.
 /// It returns true if the transformation is done. 
-static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                     int Offset, unsigned Base, bool BaseKill, int Opcode,
-                     ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
-                     SmallVector<std::pair<unsigned, bool>, 8> &Regs,
-                     const TargetInstrInfo *TII) {
-  // FIXME would it be better to take a DL from one of the loads arbitrarily?
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          int Offset, unsigned Base, bool BaseKill,
+                          int Opcode, ARMCC::CondCodes Pred,
+                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
+                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
   // Only a single register to load / store. Don't bother.
   unsigned NumRegs = Regs.size();
   if (NumRegs <= 1)
@@ -185,20 +189,21 @@ static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
 
 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
 /// load / store multiple instructions.
-SmallVector<MachineBasicBlock::iterator, 4>
+void
 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
-                              unsigned Base, int Opcode, unsigned Size,
-                              ARMCC::CondCodes Pred, unsigned PredReg,
-                              unsigned Scratch, MemOpQueue &MemOps) {
-  SmallVector<MachineBasicBlock::iterator, 4> Merges;
+                          unsigned Base, int Opcode, unsigned Size,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          unsigned Scratch, MemOpQueue &MemOps,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
   bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
   int Offset = MemOps[SIndex].Offset;
   int SOffset = Offset;
   unsigned Pos = MemOps[SIndex].Position;
   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
-  unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+  DebugLoc dl = Loc->getDebugLoc();
+  unsigned PReg = Loc->getOperand(0).getReg();
   unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
-  bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
+  bool isKill = Loc->getOperand(0).isKill();
 
   SmallVector<std::pair<unsigned,bool>, 8> Regs;
   Regs.push_back(std::make_pair(PReg, isKill));
@@ -216,18 +221,17 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
       PRegNum = RegNum;
     } else {
       // Can't merge this in. Try merge the earlier ones first.
-      if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
-                   Scratch, Regs, TII)) {
+      if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
+                   Scratch, dl, Regs)) {
         Merges.push_back(prior(Loc));
         for (unsigned j = SIndex; j < i; ++j) {
           MBB.erase(MemOps[j].MBBI);
           MemOps[j].Merged = true;
         }
       }
-      SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
-        MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
-      Merges.append(Merges2.begin(), Merges2.end());
-      return Merges;
+      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+                   MemOps, Merges);
+      return;
     }
 
     if (MemOps[i].Position > Pos) {
@@ -237,8 +241,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   }
 
   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
-  if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
-               Scratch, Regs, TII)) {
+  if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
+               Scratch, dl, Regs)) {
     Merges.push_back(prior(Loc));
     for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
       MBB.erase(MemOps[i].MBBI);
@@ -246,7 +250,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
     }
   }
 
-  return Merges;
+  return;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -530,7 +534,7 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     if (isAM2)
       // STR_PRE, STR_POST;
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
-        .addReg(MO.getReg(), getKillRegState(BaseKill))
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
     else
       // FSTMS, FSTMD
@@ -590,6 +594,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   ARMCC::CondCodes CurrPred = ARMCC::AL;
   unsigned CurrPredReg = 0;
   unsigned Position = 0;
+  SmallVector<MachineBasicBlock::iterator,4> Merges;
 
   RS->enterBasicBlock(&MBB);
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
@@ -689,16 +694,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         RS->forward(prior(MBBI));
 
         // Merge ops.
-        SmallVector<MachineBasicBlock::iterator,4> MBBII =
-          MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
-                       CurrPred, CurrPredReg, Scratch, MemOps);
+        Merges.clear();
+        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
 
         // Try folding preceeding/trailing base inc/dec into the generated
         // LDM/STM ops.
-        for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
-          if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI))
+        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+          if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
             ++NumMerges;
-        NumMerges += MBBII.size();
+        NumMerges += Merges.size();
 
         // Try folding preceeding/trailing base inc/dec into those load/store
         // that were not merged to form LDM/STM ops.
@@ -709,6 +714,13 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
 
         // RS may be pointing to an instruction that's deleted. 
         RS->skipTo(prior(MBBI));
+      } else if (NumMemOps == 1) {
+        // Try folding preceeding/trailing base inc/dec into the single
+        // load/store.
+        if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+          ++NumMerges;
+          RS->forward(prior(MBBI));
+        }
       }
 
       CurrBase = 0;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 6662be1..0b0e289 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -100,7 +100,7 @@ public:
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
     JumpTableUId(0), ConstPoolEntryUId(0) {}
 
-  ARMFunctionInfo(MachineFunction &MF) :
+  explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
     Align(isThumb ? 1U : 2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index e8daf74..b95d1f9 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -103,28 +103,28 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
   let MethodBodies = [{
     // FP is R11, R9 is available.
     static const unsigned ARM_GPR_AO_1[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6, ARM::R7,
       ARM::R8, ARM::R9, ARM::R10,
       ARM::R11 };
     // FP is R11, R9 is not available.
     static const unsigned ARM_GPR_AO_2[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6, ARM::R7,
       ARM::R8, ARM::R10,
       ARM::R11 };
     // FP is R7, R9 is available.
     static const unsigned ARM_GPR_AO_3[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6,
       ARM::R8, ARM::R9, ARM::R10,ARM::R11,
       ARM::R7 };
     // FP is R7, R9 is not available.
     static const unsigned ARM_GPR_AO_4[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6,
       ARM::R8, ARM::R10,ARM::R11,
@@ -186,7 +186,7 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
   // scavenging.
   let MethodBodies = [{
     static const unsigned THUMB_tGPR_AO[] = {
-      ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2,
       ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
 
     // FP is R7, only low registers available.
diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
index 47de5df..8221fc7 100644
--- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h
+++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -33,8 +33,8 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo {
 public:
   AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {}
 
-  AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
-                                                  GlobalRetAddr(0) {}
+  explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
+                                                           GlobalRetAddr(0) {}
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 4d7b545..5814d27 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -1000,8 +1000,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       Out << ')';
       return;
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::SDiv:
     case Instruction::UDiv:
     case Instruction::FDiv:
@@ -1020,9 +1023,12 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       bool NeedsClosingParens = printConstExprCast(CE, Static); 
       printConstantWithCast(CE->getOperand(0), CE->getOpcode());
       switch (CE->getOpcode()) {
-      case Instruction::Add: Out << " + "; break;
-      case Instruction::Sub: Out << " - "; break;
-      case Instruction::Mul: Out << " * "; break;
+      case Instruction::Add:
+      case Instruction::FAdd: Out << " + "; break;
+      case Instruction::Sub:
+      case Instruction::FSub: Out << " - "; break;
+      case Instruction::Mul:
+      case Instruction::FMul: Out << " * "; break;
       case Instruction::URem:
       case Instruction::SRem: 
       case Instruction::FRem: Out << " % "; break;
@@ -1322,8 +1328,6 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
   case Instruction::Mul:
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
-    if (!Ty->isIntOrIntVector()) break;
-    // FALL THROUGH
   case Instruction::LShr:
   case Instruction::URem: 
   case Instruction::UDiv: NeedsExplicitCast = true; break;
@@ -1387,8 +1391,6 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
     case Instruction::Mul:
       // We need to cast integer arithmetic so that it is always performed
       // as unsigned, to avoid undefined behavior on overflow.
-      if (!OpTy->isIntOrIntVector()) break;
-      // FALL THROUGH
     case Instruction::LShr:
     case Instruction::UDiv:
     case Instruction::URem:
@@ -1505,8 +1507,6 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
   case Instruction::Mul:
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
-    if (!Ty->isIntOrIntVector()) break;
-    // FALL THROUGH
   case Instruction::LShr:
   case Instruction::URem: 
   case Instruction::UDiv: 
@@ -1552,8 +1552,6 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
     case Instruction::Mul:
       // We need to cast integer arithmetic so that it is always performed
       // as unsigned, to avoid undefined behavior on overflow.
-      if (!OpTy->isIntOrIntVector()) break;
-      // FALL THROUGH
     case Instruction::LShr:
     case Instruction::UDiv:
     case Instruction::URem: // Cast to unsigned first
@@ -2606,6 +2604,10 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     Out << "-(";
     writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
     Out << ")";
+  } else if (BinaryOperator::isFNeg(&I)) {
+    Out << "-(";
+    writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
+    Out << ")";
   } else if (I.getOpcode() == Instruction::FRem) {
     // Output a call to fmod/fmodf instead of emitting a%b
     if (I.getType() == Type::FloatTy)
@@ -2630,9 +2632,12 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     writeOperandWithCast(I.getOperand(0), I.getOpcode());
 
     switch (I.getOpcode()) {
-    case Instruction::Add:  Out << " + "; break;
-    case Instruction::Sub:  Out << " - "; break;
-    case Instruction::Mul:  Out << " * "; break;
+    case Instruction::Add:
+    case Instruction::FAdd: Out << " + "; break;
+    case Instruction::Sub:
+    case Instruction::FSub: Out << " - "; break;
+    case Instruction::Mul:
+    case Instruction::FMul: Out << " * "; break;
     case Instruction::URem:
     case Instruction::SRem:
     case Instruction::FRem: Out << " % "; break;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 4082989..04a6829 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -865,8 +865,11 @@ namespace {
         Out << "Constant* " << constName << " = ConstantExpr::";
         switch (CE->getOpcode()) {
         case Instruction::Add:    Out << "getAdd(";  break;
+        case Instruction::FAdd:   Out << "getFAdd(";  break;
         case Instruction::Sub:    Out << "getSub("; break;
+        case Instruction::FSub:   Out << "getFSub("; break;
         case Instruction::Mul:    Out << "getMul("; break;
+        case Instruction::FMul:   Out << "getFMul("; break;
         case Instruction::UDiv:   Out << "getUDiv("; break;
         case Instruction::SDiv:   Out << "getSDiv("; break;
         case Instruction::FDiv:   Out << "getFDiv("; break;
@@ -1159,8 +1162,11 @@ namespace {
       break;
     }
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::FDiv:
@@ -1176,8 +1182,11 @@ namespace {
       Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
       switch (I->getOpcode()) {
       case Instruction::Add: Out << "Instruction::Add"; break;
+      case Instruction::FAdd: Out << "Instruction::FAdd"; break;
       case Instruction::Sub: Out << "Instruction::Sub"; break;
+      case Instruction::FSub: Out << "Instruction::FSub"; break;
       case Instruction::Mul: Out << "Instruction::Mul"; break;
+      case Instruction::FMul: Out << "Instruction::FMul"; break;
       case Instruction::UDiv:Out << "Instruction::UDiv"; break;
       case Instruction::SDiv:Out << "Instruction::SDiv"; break;
       case Instruction::FDiv:Out << "Instruction::FDiv"; break;
diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h
index fb93056..e6254d6 100644
--- a/lib/Target/IA64/IA64MachineFunctionInfo.h
+++ b/lib/Target/IA64/IA64MachineFunctionInfo.h
@@ -24,7 +24,7 @@ public:
   // by this machinefunction? (used to compute the appropriate
   // entry in the 'alloc' instruction at the top of the
   // machinefunction)
-  IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
+  explicit IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
 
 };
 
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index ada851d..37e5b1e 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -1060,12 +1060,15 @@ void MSILWriter::printInstruction(const Instruction* Inst) {
     break;
   // Binary
   case Instruction::Add:
+  case Instruction::FAdd:
     printBinaryInstruction("add",Left,Right);
     break;
   case Instruction::Sub:
+  case Instruction::FSub:
     printBinaryInstruction("sub",Left,Right);
     break;
-  case Instruction::Mul:  
+  case Instruction::Mul:
+  case Instruction::FMul:
     printBinaryInstruction("mul",Left,Right);
     break;
   case Instruction::UDiv:
@@ -1322,12 +1325,15 @@ void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
     printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
     break;
   case Instruction::Add:
+  case Instruction::FAdd:
     printBinaryInstruction("add",left,right);
     break;
   case Instruction::Sub:
+  case Instruction::FSub:
     printBinaryInstruction("sub",left,right);
     break;
   case Instruction::Mul:
+  case Instruction::FMul:
     printBinaryInstruction("mul",left,right);
     break;
   case Instruction::UDiv:
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index b94d7e4..1d26ae3 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -28,7 +28,8 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
 public:
   MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
 
-  MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+  explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0) {}
 
   unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
   void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 0f83fd2..ac9a143 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -46,6 +46,16 @@ static const char *getIntrinsicName(unsigned opcode) {
   case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
   case RTLIB::MUL_I16: Basename = "mul.i16"; break;
   case RTLIB::MUL_I32: Basename = "mul.i32"; break;
+
+  case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
+  case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
+  case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
+  case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
+
+  case RTLIB::SREM_I16: Basename = "srem.i16"; break;
+  case RTLIB::SREM_I32: Basename = "srem.i32"; break;
+  case RTLIB::UREM_I16: Basename = "urem.i16"; break;
+  case RTLIB::UREM_I32: Basename = "urem.i32"; break;
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -90,6 +100,20 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16));
   setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32));
 
+  // Signed division lib call names
+  setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
+  setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
+  // Unsigned division lib call names
+  setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
+  setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
+
+  // Signed remainder lib call names
+  setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
+  setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
+  // Unsigned remainder lib call names
+  setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
+  setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
+  
   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
   setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
 
@@ -105,6 +129,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setOperationAction(ISD::ADDC,    MVT::i8,  Custom);
   setOperationAction(ISD::SUBE,    MVT::i8,  Custom);
   setOperationAction(ISD::SUBC,    MVT::i8,  Custom);
+  setOperationAction(ISD::SUB,    MVT::i8,  Custom);
   setOperationAction(ISD::ADD,    MVT::i8,  Custom);
   setOperationAction(ISD::ADD,    MVT::i16, Custom);
 
@@ -354,21 +379,11 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
   FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0));
   // FIXME there isn't really debug info here
   DebugLoc dl = FR->getDebugLoc();
-  // FIXME: Not used.
-  // int Index = FR->getIndex();
 
   // Expand FrameIndex like GlobalAddress and ExternalSymbol
   // Also use Offset field for lo and hi parts. The default 
   // offset is zero.
 
-  /*
-  SDValue Offset = DAG.getConstant(0, MVT::i8);
-  SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8);
-  SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset);
-  SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset);
-  return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi);
-  */
-
   SDValue ES;
   int FrameOffset;
   SDValue FI = SDValue(N,0);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index a7744b8..87f8fb0b4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -227,15 +227,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+    // This is just the low 32 bits of a (signed) fp->i64 conversion.
+    // We cannot do this with Promote because i64 is not a legal type.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 
     // FIXME: disable this lowered code.  This generates 64-bit register values,
     // and we don't model the fact that the top part is clobbered by calls.  We
     // need to flag these together so that the value isn't live across a call.
     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
   } else {
     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
@@ -2858,7 +2857,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 // FIXME: Split this code up when LegalizeDAGTypes lands.
-SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                            DebugLoc dl) {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
@@ -2867,9 +2866,11 @@ SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
 
   SDValue Tmp;
   switch (Op.getValueType().getSimpleVT()) {
-  default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+  default: assert(0 && "Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
-    Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src);
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+                                                         PPCISD::FCTIDZ, 
+                      dl, MVT::f64, Src);
     break;
   case MVT::i64:
     Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
@@ -3740,7 +3741,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
 
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
-  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG,
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
                                                        Op.getDebugLoc());
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
@@ -3834,7 +3836,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
-    Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl));
+    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
   }
 }
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 7946474..b6d046f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -377,7 +377,7 @@ namespace llvm {
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget);
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 42883d7..b359dd3 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -59,7 +59,7 @@ private:
   bool HasFastCall;
 
 public:
-  PPCFunctionInfo(MachineFunction &MF) 
+  explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
       ReturnAddrSaveIndex(0),
       SpillsCR(false),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 5d5beeb..cb31506 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -908,6 +908,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
   // to adjust the stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 1b042dd..dea293b 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -41,7 +41,6 @@ namespace llvm {
   bool RealignStack;
   bool DisableJumpTables;
   bool StrongPHIElim;
-  bool DisableRedZone;
   bool AsmVerbosityDefault(false);
 }
 
@@ -86,11 +85,6 @@ GenerateSoftFloatCalls("soft-float",
   cl::location(UseSoftFloat),
   cl::init(false));
 static cl::opt<bool, true>
-GenerateNoImplicitFloats("no-implicit-float",
-  cl::desc("Don't generate implicit floating point instructions (x86-only)"),
-  cl::location(NoImplicitFloat),
-  cl::init(false));
-static cl::opt<bool, true>
 DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::desc("Don't place zero-initialized symbols into bss section"),
   cl::location(NoZerosInBSS),
@@ -163,11 +157,6 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
   cl::desc("Use strong PHI elimination."),
   cl::location(StrongPHIElim),
   cl::init(false));
-static cl::opt<bool, true>
-DisableRedZoneOption("disable-red-zone",
-  cl::desc("Do not emit code that uses the red zone."),
-  cl::location(DisableRedZone),
-  cl::init(false));
 
 //---------------------------------------------------------------------------
 // TargetMachine Class
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 4c3cc82..2604741 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,5 +14,6 @@
 #include "X86ELFWriterInfo.h"
 using namespace llvm;
 
-X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
+  TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
 X86ELFWriterInfo::~X86ELFWriterInfo() {}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 06e051a..acfa501 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,7 +20,7 @@ namespace llvm {
 
   class X86ELFWriterInfo : public TargetELFWriterInfo {
   public:
-    X86ELFWriterInfo();
+    X86ELFWriterInfo(bool is64Bit);
     virtual ~X86ELFWriterInfo();
   };
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1f507c3..ef60ff5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -126,7 +126,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
 
-  if (!UseSoftFloat && !NoImplicitFloat) {
+  if (!UseSoftFloat) {
     // SSE has no i16 to fp conversion, only i32
     if (X86ScalarSSEf32) {
       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
@@ -550,6 +550,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
   }
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -734,6 +738,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
 
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+    if (!DisableMMX && Subtarget->hasMMX()) {
+      setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
+      setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
+    }
   }
 
   if (Subtarget->hasSSE41()) {
@@ -868,11 +878,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 /// determining it.
 MVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                       bool isSrcConst, bool isSrcStr) const {
+                                       bool isSrcConst, bool isSrcStr,
+                                       SelectionDAG &DAG) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
-  if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
     if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
       return MVT::v4i32;
     if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
@@ -1404,11 +1417,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
       unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
                                                        TotalNumXMMRegs);
 
+      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
       assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
              "SSE register cannot be used when SSE is disabled!");
-      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) &&
+      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
              "SSE register cannot be used when SSE is disabled!");
-      if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1())
+      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
         // Kernel mode asks for SSE to be disabled, so don't push them
         // on the stack.
         TotalNumXMMRegs = 0;
@@ -2414,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
 /// specifies a shuffle of elements that is suitable for input to MOVSS,
 /// MOVSD, and MOVD, i.e. setting the lowest element.
 static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
-  int NumElts = VT.getVectorNumElements();
-  if (NumElts != 2 && NumElts != 4)
+  if (VT.getVectorElementType().getSizeInBits() < 32)
     return false;
+
+  int NumElts = VT.getVectorNumElements();
   
   if (!isUndefOrEqual(Mask[0], NumElts))
     return false;
@@ -3068,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   }
 
   // Special case for single non-zero, non-undef, element.
-  if (NumNonZero == 1 && NumElems <= 4) {
+  if (NumNonZero == 1) {
     unsigned Idx = CountTrailingZeros_32(NonZeros);
     SDValue Item = Op.getOperand(Idx);
 
@@ -3109,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     // If we have a constant or non-constant insertion into the low element of
     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
-    // depending on what the source datatype is.  Because we can only get here
-    // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
-    if (Idx == 0 &&
-        // Don't do this for i64 values on x86-32.
-        (EVT != MVT::i64 || Subtarget->is64Bit())) {
-      Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
-      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
-      return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
-                                         Subtarget->hasSSE2(), DAG);
+    // depending on what the source datatype is.
+    if (Idx == 0) {
+      if (NumZero == 0) {
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+      } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
+          (EVT == MVT::i64 && Subtarget->is64Bit())) {
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+        // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+        return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+                                           DAG);
+      } else if (EVT == MVT::i16 || EVT == MVT::i8) {
+        Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+        MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+                                           Subtarget->hasSSE2(), DAG);
+        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+      }
     }
 
     // Is it a vector logical left shift?
@@ -4248,7 +4272,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   SDValue N1 = Op.getOperand(1);
   SDValue N2 = Op.getOperand(2);
 
-  if (EVT.getSizeInBits() == 16) {
+  if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
     // Transform it so it match pinsrw which expects a 16-bit value in a GR32
     // as its second argument.
     if (N1.getValueType() != MVT::i32)
@@ -4554,6 +4578,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   MVT SrcVT = Op.getOperand(0).getValueType();
+
+  if (SrcVT.isVector()) {
+    if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
+      return Op;
+    }
+    return SDValue();
+  }
+
   assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
          "Unknown SINT_TO_FP to lower!");
 
@@ -4845,6 +4877,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
 }
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+  if (Op.getValueType().isVector()) {
+    if (Op.getValueType() == MVT::v2i32 &&
+        Op.getOperand(0).getValueType() == MVT::v2f64) {
+      return Op;
+    }
+    return SDValue();
+  }
+
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
@@ -7675,8 +7715,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
     if (Elt.getOpcode() == ISD::UNDEF)
       continue;
 
-    if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
-                               EVT.getSizeInBits()/8, i, MFI))
+    LoadSDNode *LD = cast<LoadSDNode>(Elt);
+    LoadSDNode *LDBase = cast<LoadSDNode>(Base);
+    if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
       return false;
   }
   return true;
@@ -7751,44 +7792,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
 
   MVT VT = N->getValueType(0);
   MVT EVT = VT.getVectorElementType();
-  if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
-    // We are looking for load i64 and zero extend. We want to transform
-    // it before legalizer has a chance to expand it. Also look for i64
-    // BUILD_PAIR bit casted to f64.
-    return SDValue();
-  // This must be an insertion into a zero vector.
-  SDValue HighElt = N->getOperand(1);
-  if (!isZeroNode(HighElt))
-    return SDValue();
+  
+  // Before or during type legalization, we want to try and convert a
+  // build_vector of an i64 load and a zero value into vzext_movl before the 
+  // legalizer can break it up.  
+  // FIXME: does the case below remove the need to do this?
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
+    if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+      return SDValue();
+    
+    // This must be an insertion into a zero vector.
+    SDValue HighElt = N->getOperand(1);
+    if (!isZeroNode(HighElt))
+      return SDValue();
+    
+    // Value must be a load.
+    SDNode *Base = N->getOperand(0).getNode();
+    if (!isa<LoadSDNode>(Base)) {
+      if (Base->getOpcode() != ISD::BIT_CONVERT)
+        return SDValue();
+      Base = Base->getOperand(0).getNode();
+      if (!isa<LoadSDNode>(Base))
+        return SDValue();
+    }
+    
+    // Transform it into VZEXT_LOAD addr.
+    LoadSDNode *LD = cast<LoadSDNode>(Base);
+    
+    // Load must not be an extload.
+    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+      return SDValue();
+    
+    // Load type should legal type so we don't have to legalize it.
+    if (!TLI.isTypeLegal(VT))
+      return SDValue();
+    
+    SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+    SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+    TargetLowering::TargetLoweringOpt TLO(DAG);
+    TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+    DCI.CommitTargetLoweringOpt(TLO);
+    return ResNode;
+  }
+
+  // The type legalizer will have broken apart v2i64 build_vector created during
+  // widening before the code which handles that case is run.  Look for build
+  // vector (load, load + 4, 0/undef, 0/undef)
+  if (VT == MVT::v4i32 || VT == MVT::v4f32) {
+    LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
+    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
+    if (!LD0 || !LD1)
+      return SDValue();
+    if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
+        LD1->getExtensionType() != ISD::NON_EXTLOAD)
+      return SDValue();
+    // Make sure the second elt is a consecutive load.
+    if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
+                               DAG.getMachineFunction().getFrameInfo()))
+      return SDValue();
 
-  // Value must be a load.
-  SDNode *Base = N->getOperand(0).getNode();
-  if (!isa<LoadSDNode>(Base)) {
-    if (Base->getOpcode() != ISD::BIT_CONVERT)
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
       return SDValue();
-    Base = Base->getOperand(0).getNode();
-    if (!isa<LoadSDNode>(Base))
+    if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
       return SDValue();
+    
+    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+    SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
+    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+    TargetLowering::TargetLoweringOpt TLO(DAG);
+    TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
+    DCI.CommitTargetLoweringOpt(TLO);
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
   }
-
-  // Transform it into VZEXT_LOAD addr.
-  LoadSDNode *LD = cast<LoadSDNode>(Base);
-
-  // Load must not be an extload.
-  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
-    return SDValue();
-
-  // Load type should legal type so we don't have to legalize it.
-  if (!TLI.isTypeLegal(VT))
-    return SDValue();
-
-  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
-  SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
-  SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
-  TargetLowering::TargetLoweringOpt TLO(DAG);
-  TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
-  DCI.CommitTargetLoweringOpt(TLO);
-  return ResNode;
+  return SDValue();
 }
 
 /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
@@ -8242,7 +8321,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   if (VT.getSizeInBits() != 64)
     return SDValue();
 
-  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
+  const Function *F = DAG.getMachineFunction().getFunction();
+  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps 
+    && Subtarget->hasSSE2();
   if ((VT.isVector() ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 550f8bd..fb4eb68 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -378,7 +378,8 @@ namespace llvm {
     /// determining it.
     virtual
     MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                            bool isSrcConst, bool isSrcStr) const;
+                            bool isSrcConst, bool isSrcStr,
+                            SelectionDAG &DAG) const;
     
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 2cd3733..8a9b7c9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2009,16 +2009,24 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize);
+  unsigned CalleeFrameSize = 0;
   
   unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
+    const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
-    BuildMI(MBB, MI, DL, get(Opc))
-      .addReg(Reg, RegState::Kill);
+    if (RegClass != &X86::VR128RegClass) {
+      CalleeFrameSize += SlotSize;
+      BuildMI(MBB, MI, DL, get(Opc))
+        .addReg(Reg, RegState::Kill);
+    } else {
+      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+    }
   }
+
+  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
   return true;
 }
 
@@ -2036,7 +2044,12 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
-    BuildMI(MBB, MI, DL, get(Opc), Reg);
+    const TargetRegisterClass *RegClass = CSI[i].getRegClass();
+    if (RegClass != &X86::VR128RegClass) {
+      BuildMI(MBB, MI, DL, get(Opc), Reg);
+    } else {
+      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+    }
   }
   return true;
 }
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 8f287e1..43fadc2 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -577,41 +577,17 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
 def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
 
-// Move scalar to MMX zero-extended
-// movd to MMX register zero-extends
-let AddedComplexity = 15 in {
-  def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
-           (MMX_MOVZDI2PDIrr GR32:$src)>; 
-  def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
-           (MMX_MOVZDI2PDIrr GR32:$src)>; 
-}
-
 let AddedComplexity = 20 in {
-  def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>; 
-  def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>; 
   def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
             (MMX_MOVZDI2PDIrm addr:$src)>; 
 }
 
 // Clear top half.
 let AddedComplexity = 15 in {
-  def : Pat<(v8i8 (X86vzmovl VR64:$src)),
-            (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
-  def : Pat<(v4i16 (X86vzmovl VR64:$src)),
-            (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
   def : Pat<(v2i32 (X86vzmovl VR64:$src)),
             (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
 }
 
-// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
-// 8 or 16-bits matter.
-def : Pat<(bc_v8i8  (v2i32 (scalar_to_vector GR32:$src))),
-          (MMX_MOVD64rr GR32:$src)>;
-def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
-          (MMX_MOVD64rr GR32:$src)>;
-
 // Patterns to perform canonical versions of vector shuffling.
 let AddedComplexity = 10 in {
   def : Pat<(v8i8  (mmx_unpckl_undef VR64:$src, (undef))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1fafa46..b44c7a6 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3447,7 +3447,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
 }
 
 defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
+defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
 
 // Common patterns involving scalar load
 def : Pat<(int_x86_sse41_pmovsxbq
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 8a5ac2c..fafcf7e 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -73,14 +73,15 @@ public:
                              SRetReturnReg(0),
                              GlobalBaseReg(0) {}
   
-  X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
-                                                CalleeSavedFrameSize(0),
-                                                BytesToPopOnReturn(0),
-                                                DecorationStyle(None),
-                                                ReturnAddrIndex(0),
-                                                TailCallReturnAddrDelta(0),
-                                                SRetReturnReg(0),
-                                                GlobalBaseReg(0) {}
+  explicit X86MachineFunctionInfo(MachineFunction &MF)
+    : ForceFramePointer(false),
+      CalleeSavedFrameSize(0),
+      BytesToPopOnReturn(0),
+      DecorationStyle(None),
+      ReturnAddrIndex(0),
+      TailCallReturnAddrDelta(0),
+      SRetReturnReg(0),
+      GlobalBaseReg(0) {}
   
   bool getForceFramePointer() const { return ForceFramePointer;} 
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 5af1fb1..c733f26 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -751,10 +751,12 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   // function, and use up to 128 bytes of stack space, don't have a frame
   // pointer, calls, or dynamic alloca then we do not need to adjust the
   // stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone);
   if (Is64Bit && !DisableRedZone &&
       !needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->hasCalls()) {                          // No calls.
+      !MFI->hasCalls() &&                          // No calls.
+      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (hasFP(MF)) MinSize += SlotSize;
     StackSize = std::max(MinSize,
@@ -820,13 +822,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
-  unsigned ReadyLabelId = 0;
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
-  }
-
   // Skip the callee-saved push instructions.
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -836,20 +831,20 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
 
-  if (NumBytes) {   // adjust stack pointer: ESP -= numbytes
+  if (NumBytes) {   // Adjust stack pointer: ESP -= numbytes.
     if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
-      // Check, whether EAX is livein for this function
+      // Check, whether EAX is livein for this function.
       bool isEAXAlive = false;
       for (MachineRegisterInfo::livein_iterator
            II = MF.getRegInfo().livein_begin(),
            EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
         unsigned Reg = II->first;
         isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
-                      Reg == X86::AH || Reg == X86::AL);
+                      Reg == X86::AH  || Reg == X86::AL);
       }
 
-      // Function prologue calls _alloca to probe the stack when allocating
-      // more than 4k bytes in one go. Touching the stack at 4K increments is
+      // Function prologue calls _alloca to probe the stack when allocating more
+      // than 4k bytes in one go. Touching the stack at 4K increments is
       // necessary to ensure that the guard pages used by the OS virtual memory
       // manager are allocated in correct sequence.
       if (!isEAXAlive) {
@@ -861,12 +856,14 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
         // Save EAX
         BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
           .addReg(X86::EAX, RegState::Kill);
+
         // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
         // allocated bytes for EAX.
-        BuildMI(MBB, MBBI, DL, 
-                TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4);
+        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+          .addImm(NumBytes-4);
         BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
           .addExternalSymbol("_alloca");
+
         // Restore EAX
         MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
                                                 X86::EAX),
@@ -878,6 +875,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       // merge the two. This can be the case when tail call elimination is
       // enabled and the callee has more arguments then the caller.
       NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
       // If there is an ADD32ri or SUB32ri of ESP immediately after this
       // instruction, merge the two instructions.
       mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
@@ -887,8 +885,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     }
   }
 
-  if (needsFrameMoves)
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer is ready.
+    unsigned ReadyLabelId = 0;
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
     emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+  }
 }
 
 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8264462..88ab247 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -133,7 +133,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
     DataLayout(Subtarget.getDataLayout()),
     FrameInfo(TargetFrameInfo::StackGrowsDown,
               Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
+    InstrInfo(*this), JITInfo(*this), TLInfo(*this),
+    ELFWriterInfo(Subtarget.is64Bit()) {
   DefRelocModel = getRelocationModel();
   // FIXME: Correctly select PIC model for Win64 stuff
   if (getRelocationModel() == Reloc::Default) {
@@ -213,6 +214,13 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
                                           bool Verbose,
                                           raw_ostream &Out) {
+  // FIXME: Move this somewhere else!
+  // On Darwin, override 64-bit static relocation to pic_ since the
+  // assembler doesn't support it.
+  if (DefRelocModel == Reloc::Static &&
+      Subtarget.isTargetDarwin() && Subtarget.is64Bit())
+    setRelocationModel(Reloc::PIC_);
+
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
     PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 43adb0f..124a011 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -40,7 +40,7 @@ public:
     FPSpillSlot(0),
     VarArgsFrameIndex(0) {}
   
-  XCoreFunctionInfo(MachineFunction &MF) :
+  explicit XCoreFunctionInfo(MachineFunction &MF) :
     UsesLR(false),
     LRSpillSlot(0),
     FPSpillSlot(0),
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 733dfa9..673d38b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -59,7 +59,8 @@ cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 /// two values.
 namespace {
   struct VISIBILITY_HIDDEN Expression {
-    enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 
+    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
+                            UDIV, SDIV, FDIV, UREM, SREM,
                             FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 
                             ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 
                             ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 
@@ -200,8 +201,11 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
   default: // THIS SHOULD NEVER HAPPEN
     assert(0 && "Binary operator with unknown opcode?");
   case Instruction::Add:  return Expression::ADD;
+  case Instruction::FAdd: return Expression::FADD;
   case Instruction::Sub:  return Expression::SUB;
+  case Instruction::FSub: return Expression::FSUB;
   case Instruction::Mul:  return Expression::MUL;
+  case Instruction::FMul: return Expression::FMUL;
   case Instruction::UDiv: return Expression::UDIV;
   case Instruction::SDiv: return Expression::SDIV;
   case Instruction::FDiv: return Expression::FDIV;
diff --git a/lib/Transforms/Scalar/GVNPRE.cpp b/lib/Transforms/Scalar/GVNPRE.cpp
index e3b0937..0f3153f 100644
--- a/lib/Transforms/Scalar/GVNPRE.cpp
+++ b/lib/Transforms/Scalar/GVNPRE.cpp
@@ -55,7 +55,8 @@ namespace {
 /// two values.
 
 struct Expression {
-  enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 
+  enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
+                          UDIV, SDIV, FDIV, UREM, SREM,
                           FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 
                           ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 
                           ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 
@@ -202,10 +203,16 @@ Expression::ExpressionOpcode
   switch(BO->getOpcode()) {
     case Instruction::Add:
       return Expression::ADD;
+    case Instruction::FAdd:
+      return Expression::FADD;
     case Instruction::Sub:
       return Expression::SUB;
+    case Instruction::FSub:
+      return Expression::FSUB;
     case Instruction::Mul:
       return Expression::MUL;
+    case Instruction::FMul:
+      return Expression::FMUL;
     case Instruction::UDiv:
       return Expression::UDIV;
     case Instruction::SDiv:
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index af61eae..83503fd 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -754,7 +754,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   BinaryOperator *Incr =
     dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
   if (!Incr) return;
-  if (Incr->getOpcode() != Instruction::Add) return;
+  if (Incr->getOpcode() != Instruction::FAdd) return;
   ConstantFP *IncrValue = NULL;
   unsigned IncrVIndex = 1;
   if (Incr->getOperand(1) == PH)
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index e6f854f..97bd34c 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -167,8 +167,11 @@ namespace {
     //   otherwise    - Change was made, replace I with returned instruction
     //
     Instruction *visitAdd(BinaryOperator &I);
+    Instruction *visitFAdd(BinaryOperator &I);
     Instruction *visitSub(BinaryOperator &I);
+    Instruction *visitFSub(BinaryOperator &I);
     Instruction *visitMul(BinaryOperator &I);
+    Instruction *visitFMul(BinaryOperator &I);
     Instruction *visitURem(BinaryOperator &I);
     Instruction *visitSRem(BinaryOperator &I);
     Instruction *visitFRem(BinaryOperator &I);
@@ -403,7 +406,8 @@ X("instcombine", "Combine redundant instructions");
 //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
-    if (BinaryOperator::isNeg(V) || BinaryOperator::isNot(V))
+    if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
+        BinaryOperator::isNot(V))
       return 3;
     return 4;
   }
@@ -576,6 +580,25 @@ static inline Value *dyn_castNegVal(Value *V) {
   return 0;
 }
 
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+static inline Value *dyn_castFNegVal(Value *V) {
+  if (BinaryOperator::isFNeg(V))
+    return BinaryOperator::getFNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+    return ConstantExpr::getFNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isFloatingPoint())
+      return ConstantExpr::getFNeg(C);
+
+  return 0;
+}
+
 static inline Value *dyn_castNotVal(Value *V) {
   if (BinaryOperator::isNot(V))
     return BinaryOperator::getNotArgument(V);
@@ -1733,12 +1756,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           default: assert(0 && "Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
           case Intrinsic::x86_sse2_sub_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
                                                         II->getName()), *II);
             break;
           case Intrinsic::x86_sse_mul_ss:
           case Intrinsic::x86_sse2_mul_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
                                                          II->getName()), *II);
             break;
           }
@@ -2052,14 +2075,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, RHS);
 
     // X + 0 --> X
-    if (!I.getType()->isFPOrFPVector()) { // NOTE: -0 + +0 = +0.
-      if (RHSC->isNullValue())
-        return ReplaceInstUsesWith(I, LHS);
-    } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
-                              (I.getType())->getValueAPF()))
-        return ReplaceInstUsesWith(I, LHS);
-    }
+    if (RHSC->isNullValue())
+      return ReplaceInstUsesWith(I, LHS);
 
     if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
       // X + (signbit) --> X ^ signbit
@@ -2317,11 +2334,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         return SelectInst::Create(SI->getCondition(), A, N);
     }
   }
-  
-  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
-    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
-      return ReplaceInstUsesWith(I, LHS);
 
   // Check for (add (sext x), y), see if we can merge this into an
   // integer add followed by a sext.
@@ -2359,7 +2371,42 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       }
     }
   }
-  
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // X + 0 --> X
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
+        return ReplaceInstUsesWith(I, LHS);
+    }
+
+    if (isa<PHINode>(LHS))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castFNegVal(LHS))
+    return BinaryOperator::CreateFSub(RHS, LHSV);
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castFNegVal(RHS))
+      return BinaryOperator::CreateFSub(LHS, V);
+
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
   // Check for (add double (sitofp x), y), see if we can merge this into an
   // integer add followed by a promotion.
   if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
@@ -2407,8 +2454,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Op0 == Op1 &&                        // sub X, X  -> 0
-      !I.getType()->isFPOrFPVector())
+  if (Op0 == Op1)                        // sub X, X  -> 0
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // If this is a 'B = x-(-A)', change to B = x+A...
@@ -2469,8 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return BinaryOperator::CreateXor(Op0, Op1);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-    if (Op1I->getOpcode() == Instruction::Add &&
-        !Op0->getType()->isFPOrFPVector()) {
+    if (Op1I->getOpcode() == Instruction::Add) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
         return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
@@ -2487,8 +2532,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
       // is not used by anyone else...
       //
-      if (Op1I->getOpcode() == Instruction::Sub &&
-          !Op1I->getType()->isFPOrFPVector()) {
+      if (Op1I->getOpcode() == Instruction::Sub) {
         // Swap the two operands of the subexpr...
         Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
         Op1I->setOperand(0, IIOp1);
@@ -2526,18 +2570,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     }
   }
 
-  if (!Op0->getType()->isFPOrFPVector())
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-      if (Op0I->getOpcode() == Instruction::Add) {
-        if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
-          return ReplaceInstUsesWith(I, Op0I->getOperand(1));
-        else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
-          return ReplaceInstUsesWith(I, Op0I->getOperand(0));
-      } else if (Op0I->getOpcode() == Instruction::Sub) {
-        if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-          return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
-      }
+  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+    if (Op0I->getOpcode() == Instruction::Add) {
+      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
+        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
+        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
+    } else if (Op0I->getOpcode() == Instruction::Sub) {
+      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
+        return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
     }
+  }
 
   ConstantInt *C1;
   if (Value *X = dyn_castFoldableMul(Op0, C1)) {
@@ -2551,6 +2594,40 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   return 0;
 }
 
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // If this is a 'B = x-(-A)', change to B = x+A...
+  if (Value *V = dyn_castFNegVal(Op1))
+    return BinaryOperator::CreateFAdd(Op0, V);
+
+  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+    if (Op1I->getOpcode() == Instruction::FAdd) {
+      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName());
+      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
+    }
+
+    if (Op1I->hasOneUse()) {
+      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
+      // is not used by anyone else...
+      //
+      if (Op1I->getOpcode() == Instruction::FSub) {
+        // Swap the two operands of the subexpr...
+        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
+        Op1I->setOperand(0, IIOp1);
+        Op1I->setOperand(1, IIOp0);
+
+        // Create the new top level fadd instruction...
+        return BinaryOperator::CreateFAdd(Op0, Op1);
+      }
+    }
+  }
+
+  return 0;
+}
+
 /// isSignBitCheck - Given an exploded icmp instruction, return true if the
 /// comparison only checks the sign bit.  If it only checks the sign bit, set
 /// TrueIfSigned if the result of the comparison is true when the input value is
@@ -2585,7 +2662,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0);
 
-  if (isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
+  // TODO: If Op1 is undef and Op0 is finite, return zero.
+  if (!I.getType()->isFPOrFPVector() &&
+      isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // Simplify mul instructions with a constant RHS...
@@ -2611,17 +2690,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         return BinaryOperator::CreateShl(Op0,
                  ConstantInt::get(Op0->getType(), Val.logBase2()));
       }
-    } else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
-      if (Op1F->isNullValue())
-        return ReplaceInstUsesWith(I, Op1);
-
-      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
-      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->isExactlyValue(1.0))
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
     } else if (isa<VectorType>(Op1->getType())) {
-      if (isa<ConstantAggregateZero>(Op1))
-        return ReplaceInstUsesWith(I, Op1);
+      // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros.
 
       if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
         if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
@@ -2629,9 +2699,6 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
         // As above, vector X*splat(1.0) -> X in all defined cases.
         if (Constant *Splat = Op1V->getSplatValue()) {
-          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
-            if (F->isExactlyValue(1.0))
-              return ReplaceInstUsesWith(I, Op0);
           if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
             if (CI->equalsInt(1))
               return ReplaceInstUsesWith(I, Op0);
@@ -2755,6 +2822,45 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0);
+
+  // Simplify mul instructions with a constant RHS...
+  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
+      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
+      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+      if (Op1F->isExactlyValue(1.0))
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+    } else if (isa<VectorType>(Op1->getType())) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+        // As above, vector X*splat(1.0) -> X in all defined cases.
+        if (Constant *Splat = Op1V->getSplatValue()) {
+          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+            if (F->isExactlyValue(1.0))
+              return ReplaceInstUsesWith(I, Op0);
+        }
+      }
+    }
+
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1)))
+      return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+  return Changed ? &I : 0;
+}
+
 /// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
 /// instruction.
 bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
@@ -8562,17 +8668,17 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   if (Instruction *I = commonCastTransforms(CI))
     return I;
   
-  // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are
+  // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
   // smaller than the destination type, we can eliminate the truncate by doing
-  // the add as the smaller type.  This applies to add/sub/mul/div as well as
+  // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well as
   // many builtins (sqrt, etc).
   BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
   if (OpI && OpI->hasOneUse()) {
     switch (OpI->getOpcode()) {
     default: break;
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::Mul:
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
     case Instruction::FDiv:
     case Instruction::FRem:
       const Type *SrcTy = OpI->getType();
@@ -9322,11 +9428,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
         // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
         // even legal for FP.
-        if (TI->getOpcode() == Instruction::Sub &&
-            FI->getOpcode() == Instruction::Add) {
+        if ((TI->getOpcode() == Instruction::Sub &&
+             FI->getOpcode() == Instruction::Add) ||
+            (TI->getOpcode() == Instruction::FSub &&
+             FI->getOpcode() == Instruction::FAdd)) {
           AddOp = FI; SubOp = TI;
-        } else if (FI->getOpcode() == Instruction::Sub &&
-                   TI->getOpcode() == Instruction::Add) {
+        } else if ((FI->getOpcode() == Instruction::Sub &&
+                    TI->getOpcode() == Instruction::Add) ||
+                   (FI->getOpcode() == Instruction::FSub &&
+                    TI->getOpcode() == Instruction::FAdd)) {
           AddOp = TI; SubOp = FI;
         }
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 92270b5..944f409 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2268,7 +2268,8 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       /* create new increment. '++d' in above example. */
       ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
-        BinaryOperator::Create(Incr->getOpcode(),
+        BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+                                 Instruction::FAdd : Instruction::FSub,
                                NewPH, CFP, "IV.S.next.", Incr);
 
       NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
@@ -2424,24 +2425,14 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
   // Get the terminating condition for the loop if possible (this isn't
   // necessarily in the latch, or a block that's a predecessor of the header).
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1) return;
+  if (!L->getExitBlock())
+    return; // More than one loop exit blocks.
 
   // Okay, there is one exit block.  Try to find the condition that causes the
   // loop to be exited.
-  BasicBlock *ExitBlock = ExitBlocks[0];
-
-  BasicBlock *ExitingBlock = 0;
-  for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
-       PI != E; ++PI)
-    if (L->contains(*PI)) {
-      if (ExitingBlock == 0)
-        ExitingBlock = *PI;
-      else
-        return; // More than one block exiting!
-    }
-  assert(ExitingBlock && "No exits from loop, something is broken!");
+  BasicBlock *ExitingBlock = L->getExitingBlock();
+  if (!ExitingBlock)
+    return; // More than one block exiting!
 
   // Okay, we've computed the exiting block.  See what condition causes us to
   // exit.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 4b00640..59989c9 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -1009,7 +1009,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
     if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
       return Op1;
     if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
-      return B.CreateMul(Op1, Op1, "pow2");
+      return B.CreateFMul(Op1, Op1, "pow2");
     if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
       return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
     return 0;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2cde765..bcc6b81 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -419,9 +419,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
       case Instruction::LShr:
       case Instruction::AShr:
       case Instruction::ICmp:
-      case Instruction::FCmp:
-        if (I->getOperand(0)->getType()->isFPOrFPVector())
-          return false;  // FP arithmetic might trap.
         break;   // These are all cheap and non-trapping instructions.
       }
 
@@ -1012,9 +1009,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   default: return false;  // Not safe / profitable to hoist.
   case Instruction::Add:
   case Instruction::Sub:
-    // FP arithmetic might trap. Not worth doing for vector ops.
-    if (HInst->getType()->isFloatingPoint() 
-        || isa<VectorType>(HInst->getType()))
+    // Not worth doing for vector ops.
+    if (isa<VectorType>(HInst->getType()))
       return false;
     break;
   case Instruction::And:
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 5a8fad9..8dfbd1d 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -59,6 +59,10 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "ssp ";
   if (Attrs & Attribute::StackProtectReq)
     Result += "sspreq ";
+  if (Attrs & Attribute::NoRedZone)
+    Result += "noredzone ";
+  if (Attrs & Attribute::NoImplicitFloat)
+    Result += "noimplicitfloat ";
   if (Attrs & Attribute::Alignment) {
     Result += "align ";
     Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 7e4902f..1d293cc 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -602,10 +602,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       return Constant::getNullValue(C1->getType());
     case Instruction::UDiv:
     case Instruction::SDiv:
-    case Instruction::FDiv:
     case Instruction::URem:
     case Instruction::SRem:
-    case Instruction::FRem:
       if (!isa<UndefValue>(C2))                    // undef / X -> 0
         return Constant::getNullValue(C1->getType());
       return const_cast<Constant*>(C2);            // X / undef -> undef
@@ -783,13 +781,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       switch (Opcode) {
       default:                   
         break;
-      case Instruction::Add:
+      case Instruction::FAdd:
         (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C3V);
-      case Instruction::Sub:     
+      case Instruction::FSub:
         (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C3V);
-      case Instruction::Mul:
+      case Instruction::FMul:
         (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C3V);
       case Instruction::FDiv:
@@ -808,12 +806,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       switch (Opcode) {
       default:
         break;
-      case Instruction::Add: 
+      case Instruction::Add:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAdd);
-      case Instruction::Sub: 
+      case Instruction::FAdd:
+        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFAdd);
+      case Instruction::Sub:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSub);
-      case Instruction::Mul: 
+      case Instruction::FSub:
+        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFSub);
+      case Instruction::Mul:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getMul);
+      case Instruction::FMul:
+        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFMul);
       case Instruction::UDiv:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getUDiv);
       case Instruction::SDiv:
@@ -851,7 +855,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     // other way if possible.
     switch (Opcode) {
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor:
@@ -862,6 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::LShr:
     case Instruction::AShr:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::SDiv:
     case Instruction::UDiv:
     case Instruction::FDiv:
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 97f3ac9..69c503d 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -775,26 +775,46 @@ const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const {
 /// specify the full Instruction::OPCODE identifier.
 ///
 Constant *ConstantExpr::getNeg(Constant *C) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (C->getType()->isFPOrFPVector())
+    return getFNeg(C);
+  assert(C->getType()->isIntOrIntVector() &&
+         "Cannot NEG a nonintegral value!");
   return get(Instruction::Sub,
              ConstantExpr::getZeroValueForNegationExpr(C->getType()),
              C);
 }
+Constant *ConstantExpr::getFNeg(Constant *C) {
+  assert(C->getType()->isFPOrFPVector() &&
+         "Cannot FNEG a non-floating-point value!");
+  return get(Instruction::FSub,
+             ConstantExpr::getZeroValueForNegationExpr(C->getType()),
+             C);
+}
 Constant *ConstantExpr::getNot(Constant *C) {
-  assert((isa<IntegerType>(C->getType()) ||
-            cast<VectorType>(C->getType())->getElementType()->isInteger()) &&
-          "Cannot NOT a nonintegral value!");
+  assert(C->getType()->isIntOrIntVector() &&
+         "Cannot NOT a nonintegral value!");
   return get(Instruction::Xor, C,
              Constant::getAllOnesValue(C->getType()));
 }
 Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2) {
   return get(Instruction::Add, C1, C2);
 }
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+  return get(Instruction::FAdd, C1, C2);
+}
 Constant *ConstantExpr::getSub(Constant *C1, Constant *C2) {
   return get(Instruction::Sub, C1, C2);
 }
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+  return get(Instruction::FSub, C1, C2);
+}
 Constant *ConstantExpr::getMul(Constant *C1, Constant *C2) {
   return get(Instruction::Mul, C1, C2);
 }
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+  return get(Instruction::FMul, C1, C2);
+}
 Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2) {
   return get(Instruction::UDiv, C1, C2);
 }
@@ -2142,15 +2162,28 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
 }
 
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (C1->getType()->isFPOrFPVector()) {
+    if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
+    else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
+    else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
+  }
 #ifndef NDEBUG
   switch (Opcode) {
-  case Instruction::Add: 
+  case Instruction::Add:
   case Instruction::Sub:
-  case Instruction::Mul: 
+  case Instruction::Mul:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || C1->getType()->isFloatingPoint() ||
-            isa<VectorType>(C1->getType())) &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(C1->getType()->isIntOrIntVector() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVector() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
     break;
   case Instruction::UDiv: 
   case Instruction::SDiv: 
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 9e030b7..7556b8e 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -101,8 +101,11 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
 
   // Standard binary operators...
   case Add: return "add";
+  case FAdd: return "fadd";
   case Sub: return "sub";
+  case FSub: return "fsub";
   case Mul: return "mul";
+  case FMul: return "fmul";
   case UDiv: return "udiv";
   case SDiv: return "sdiv";
   case FDiv: return "fdiv";
@@ -330,19 +333,13 @@ bool Instruction::mayThrow() const {
 
 /// isAssociative - Return true if the instruction is associative:
 ///
-///   Associative operators satisfy:  x op (y op z) === (x op y) op z)
+///   Associative operators satisfy:  x op (y op z) === (x op y) op z
 ///
-/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative, when not
-/// applied to floating point types.
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
 ///
 bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
-  if (Opcode == And || Opcode == Or || Opcode == Xor)
-    return true;
-
-  // Add/Mul reassociate unless they are FP or FP vectors.
-  if (Opcode == Add || Opcode == Mul)
-    return !Ty->isFPOrFPVector();
-  return 0;
+  return Opcode == And || Opcode == Or || Opcode == Xor ||
+         Opcode == Add || Opcode == Mul;
 }
 
 /// isCommutative - Return true if the instruction is commutative:
@@ -355,7 +352,9 @@ bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
 bool Instruction::isCommutative(unsigned op) {
   switch (op) {
   case Add:
+  case FAdd:
   case Mul:
+  case FMul:
   case And:
   case Or:
   case Xor:
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index fe30271..4c228fe 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1502,29 +1502,43 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
 //                             BinaryOperator Class
 //===----------------------------------------------------------------------===//
 
+/// AdjustIType - Map Add, Sub, and Mul to FAdd, FSub, and FMul when the
+/// type is floating-point, to help provide compatibility with an older API.
+///
+static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
+                                             const Type *Ty) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (Ty->isFPOrFPVector()) {
+    if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
+    else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
+    else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
+  }
+  return iType;
+}
+
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
                                const Type *Ty, const std::string &Name,
                                Instruction *InsertBefore)
-  : Instruction(Ty, iType,
+  : Instruction(Ty, AdjustIType(iType, Ty),
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertBefore) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(iType);
+  init(AdjustIType(iType, Ty));
   setName(Name);
 }
 
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
                                const Type *Ty, const std::string &Name,
                                BasicBlock *InsertAtEnd)
-  : Instruction(Ty, iType,
+  : Instruction(Ty, AdjustIType(iType, Ty),
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertAtEnd) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(iType);
+  init(AdjustIType(iType, Ty));
   setName(Name);
 }
 
@@ -1537,12 +1551,19 @@ void BinaryOperator::init(BinaryOps iType) {
 #ifndef NDEBUG
   switch (iType) {
   case Add: case Sub:
-  case Mul: 
+  case Mul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isIntOrIntVector() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case FAdd: case FSub:
+  case FMul:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isInteger() || getType()->isFloatingPoint() ||
-            isa<VectorType>(getType())) &&
-          "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(getType()->isFPOrFPVector() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
     break;
   case UDiv: 
   case SDiv: 
@@ -1631,6 +1652,22 @@ BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name,
                             Op->getType(), Name, InsertAtEnd);
 }
 
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name,
+                                           Instruction *InsertBefore) {
+  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name,
+                                           BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
 BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name,
                                           Instruction *InsertBefore) {
   Constant *C;
@@ -1679,6 +1716,14 @@ bool BinaryOperator::isNeg(const Value *V) {
   return false;
 }
 
+bool BinaryOperator::isFNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::FSub)
+      return Bop->getOperand(0) ==
+             ConstantExpr::getZeroValueForNegationExpr(Bop->getType());
+  return false;
+}
+
 bool BinaryOperator::isNot(const Value *V) {
   if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
     return (Bop->getOpcode() == Instruction::Xor &&
@@ -1696,6 +1741,15 @@ const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
   return getNegArgument(const_cast<Value*>(BinOp));
 }
 
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+  assert(isFNeg(BinOp) && "getFNegArgument from non-'fneg' instruction!");
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+  return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
 Value *BinaryOperator::getNotArgument(Value *BinOp) {
   assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
   BinaryOperator *BO = cast<BinaryOperator>(BinOp);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 59ec3be..b047d0c 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1069,13 +1069,40 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
           "Both operands to a binary operator are not of the same type!", &B);
 
   switch (B.getOpcode()) {
+  // Check that integer arithmetic operators are only used with
+  // integral operands.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    Assert1(B.getType()->isIntOrIntVector(),
+            "Integer arithmetic operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Integer arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that floating-point arithmetic operators are only used with
+  // floating-point operands.
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    Assert1(B.getType()->isFPOrFPVector(),
+            "Floating-point arithmetic operators only work with "
+            "floating-point types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Floating-point arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
   // Check that logical operators are only used with integral operands.
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-    Assert1(B.getType()->isInteger() ||
-            (isa<VectorType>(B.getType()) && 
-             cast<VectorType>(B.getType())->getElementType()->isInteger()),
+    Assert1(B.getType()->isIntOrIntVector(),
             "Logical operators only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Logical operators must have same type for operands and result!",
@@ -1084,22 +1111,13 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    Assert1(B.getType()->isInteger() ||
-            (isa<VectorType>(B.getType()) && 
-             cast<VectorType>(B.getType())->getElementType()->isInteger()),
+    Assert1(B.getType()->isIntOrIntVector(),
             "Shifts only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Shift return type must be same as operands!", &B);
-    /* FALL THROUGH */
-  default:
-    // Arithmetic operators only work on integer or fp values
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Arithmetic operators must have same type for operands and result!",
-            &B);
-    Assert1(B.getType()->isInteger() || B.getType()->isFloatingPoint() ||
-            isa<VectorType>(B.getType()),
-            "Arithmetic operators must have integer, fp, or vector type!", &B);
     break;
+  default:
+    assert(0 && "Unknown BinaryOperator opcode!");
   }
 
   visitInstruction(B);
diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll
index 4b2fcea..17f2dff 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -18,7 +18,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll
index a9175c3..ca6ad0a 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -18,7 +18,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i7 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
@@ -41,7 +41,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
@@ -64,7 +64,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
@@ -87,7 +87,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll
index a61d5da..49c4e13 100644
--- a/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -13,7 +13,7 @@ loop:		; preds = %loop, %entry
 	%indvar.i8 = ashr i64 %s0, 8		; <i64> [#uses=1]
 	%t0 = getelementptr double* %d, i64 %indvar.i8		; <double*> [#uses=2]
 	%t1 = load double* %t0		; <double> [#uses=1]
-	%t2 = mul double %t1, 1.000000e-01		; <double> [#uses=1]
+	%t2 = fmul double %t1, 1.000000e-01		; <double> [#uses=1]
 	store double %t2, double* %t0
 	%indvar.next = sub i64 %indvar, 1		; <i64> [#uses=2]
 	%exitcond = icmp eq i64 %indvar.next, 10		; <i1> [#uses=1]
diff --git a/test/Assembler/2002-04-07-HexFloatConstants.ll b/test/Assembler/2002-04-07-HexFloatConstants.ll
index b9860b3..5c54b39 100644
--- a/test/Assembler/2002-04-07-HexFloatConstants.ll
+++ b/test/Assembler/2002-04-07-HexFloatConstants.ll
@@ -11,6 +11,6 @@
 ; RUN: diff %t.1 %t.2
 
 define double @test() {
-        %tmp = mul double 7.200000e+101, 0x427F4000             ; <double> [#uses=1]
+        %tmp = fmul double 7.200000e+101, 0x427F4000             ; <double> [#uses=1]
         ret double %tmp
 }
diff --git a/test/Assembler/2002-04-07-InfConstant.ll b/test/Assembler/2002-04-07-InfConstant.ll
index 317b8f3..71837c9 100644
--- a/test/Assembler/2002-04-07-InfConstant.ll
+++ b/test/Assembler/2002-04-07-InfConstant.ll
@@ -3,7 +3,7 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep 0x7FF0000000000000
 
 define float @test() {
-        %tmp = mul float 0x7FF0000000000000, 1.000000e+01               ; <float> [#uses=1]
+        %tmp = fmul float 0x7FF0000000000000, 1.000000e+01               ; <float> [#uses=1]
         ret float %tmp
 }
 
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3661c4c..6e11b16 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -35,8 +35,8 @@ cond_next589:		; preds = %cond_next489
 	%tmp612 = load i32* null		; <i32> [#uses=1]
 	%tmp629 = load i32* null		; <i32> [#uses=1]
 	%tmp629a = sitofp i32 %tmp629 to double		; <double> [#uses=1]
-	%tmp631 = mul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
-	%tmp632 = add double 0.000000e+00, %tmp631		; <double> [#uses=1]
+	%tmp631 = fmul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
+	%tmp632 = fadd double 0.000000e+00, %tmp631		; <double> [#uses=1]
 	%tmp642 = call fastcc i32 @sign( i32 %tmp576, i32 %tmp561 )		; <i32> [#uses=1]
 	%tmp650 = mul i32 %tmp606, %tmp642		; <i32> [#uses=1]
 	%tmp656 = mul i32 %tmp650, %tmp612		; <i32> [#uses=1]
@@ -46,8 +46,8 @@ cond_next589:		; preds = %cond_next489
 	%tmp666 = sub i32 %tmp660, %tmp496		; <i32> [#uses=1]
 	%tmp667 = sitofp i32 %tmp666 to double		; <double> [#uses=2]
 	call void @levrun_linfo_inter( i32 %tmp576, i32 0, i32* null, i32* null )
-	%tmp671 = mul double %tmp667, %tmp667		; <double> [#uses=1]
-	%tmp675 = add double %tmp671, 0.000000e+00		; <double> [#uses=1]
+	%tmp671 = fmul double %tmp667, %tmp667		; <double> [#uses=1]
+	%tmp675 = fadd double %tmp671, 0.000000e+00		; <double> [#uses=1]
 	%tmp678 = fcmp oeq double %tmp632, %tmp675		; <i1> [#uses=1]
 	br i1 %tmp678, label %cond_true679, label %cond_false693
 
diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
index 7b7ea6b..3f17a51 100644
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 184
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
 
 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index 56e949f..bd5b719 100644
--- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -11,7 +11,7 @@ bb.thread:
 	br label %bb52
 
 bb32:		; preds = %bb52
-	%0 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%0 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%1 = add i32 %j.1, 1		; <i32> [#uses=1]
 	br label %bb52
 
@@ -29,14 +29,14 @@ bb53:		; preds = %bb52
 
 bb55:		; preds = %bb53
 	%4 = load double* @a, align 4		; <double> [#uses=10]
-	%5 = add double %4, 0.000000e+00		; <double> [#uses=16]
+	%5 = fadd double %4, 0.000000e+00		; <double> [#uses=16]
 	%6 = fcmp ogt double %k.4, 0.000000e+00		; <i1> [#uses=1]
-	%.pn404 = mul double %4, %4		; <double> [#uses=4]
-	%.pn402 = mul double %5, %5		; <double> [#uses=5]
+	%.pn404 = fmul double %4, %4		; <double> [#uses=4]
+	%.pn402 = fmul double %5, %5		; <double> [#uses=5]
 	%.pn165.in = load double* @N		; <double> [#uses=5]
-	%.pn198 = mul double 0.000000e+00, %5		; <double> [#uses=1]
-	%.pn185 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%.pn147 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn198 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn185 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn147 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%.pn141 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
 	%.pn142 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
 	%.pn136 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
@@ -47,178 +47,178 @@ bb55:		; preds = %bb53
 	%.pn117 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
 	%.pn118 = fdiv double %.pn185, %5		; <double> [#uses=1]
 	%.pn88 = fdiv double %.pn147, %5		; <double> [#uses=1]
-	%.pn81 = sub double %.pn141, %.pn142		; <double> [#uses=1]
-	%.pn77 = sub double 0.000000e+00, %.pn136		; <double> [#uses=1]
-	%.pn75 = sub double 0.000000e+00, %.pn132		; <double> [#uses=1]
-	%.pn69 = sub double %.pn123, %.pn124		; <double> [#uses=1]
-	%.pn67 = sub double 0.000000e+00, %.pn120		; <double> [#uses=1]
-	%.pn56 = sub double %.pn117, %.pn118		; <double> [#uses=1]
-	%.pn42 = sub double 0.000000e+00, %.pn88		; <double> [#uses=1]
-	%.pn60 = mul double %.pn81, 0.000000e+00		; <double> [#uses=1]
-	%.pn57 = add double %.pn77, 0.000000e+00		; <double> [#uses=1]
-	%.pn58 = mul double %.pn75, %.pn165.in		; <double> [#uses=1]
-	%.pn32 = add double %.pn69, 0.000000e+00		; <double> [#uses=1]
-	%.pn33 = mul double %.pn67, %.pn165.in		; <double> [#uses=1]
-	%.pn17 = sub double 0.000000e+00, %.pn60		; <double> [#uses=1]
-	%.pn9 = add double %.pn57, %.pn58		; <double> [#uses=1]
-	%.pn30 = mul double 0.000000e+00, %.pn56		; <double> [#uses=1]
-	%.pn24 = mul double 0.000000e+00, %.pn42		; <double> [#uses=1]
-	%.pn1 = add double %.pn32, %.pn33		; <double> [#uses=1]
-	%.pn28 = sub double %.pn30, 0.000000e+00		; <double> [#uses=1]
-	%.pn26 = add double %.pn28, 0.000000e+00		; <double> [#uses=1]
-	%.pn22 = sub double %.pn26, 0.000000e+00		; <double> [#uses=1]
-	%.pn20 = sub double %.pn24, 0.000000e+00		; <double> [#uses=1]
-	%.pn18 = add double %.pn22, 0.000000e+00		; <double> [#uses=1]
-	%.pn16 = add double %.pn20, 0.000000e+00		; <double> [#uses=1]
-	%.pn14 = sub double %.pn18, 0.000000e+00		; <double> [#uses=1]
-	%.pn12 = sub double %.pn16, %.pn17		; <double> [#uses=1]
-	%.pn10 = add double %.pn14, 0.000000e+00		; <double> [#uses=1]
-	%.pn8 = add double %.pn12, 0.000000e+00		; <double> [#uses=1]
-	%.pn6 = sub double %.pn10, 0.000000e+00		; <double> [#uses=1]
-	%.pn4 = sub double %.pn8, %.pn9		; <double> [#uses=1]
-	%.pn2 = add double %.pn6, 0.000000e+00		; <double> [#uses=1]
-	%.pn = add double %.pn4, 0.000000e+00		; <double> [#uses=1]
-	%N1.0 = sub double %.pn2, 0.000000e+00		; <double> [#uses=2]
-	%D1.0 = sub double %.pn, %.pn1		; <double> [#uses=2]
+	%.pn81 = fsub double %.pn141, %.pn142		; <double> [#uses=1]
+	%.pn77 = fsub double 0.000000e+00, %.pn136		; <double> [#uses=1]
+	%.pn75 = fsub double 0.000000e+00, %.pn132		; <double> [#uses=1]
+	%.pn69 = fsub double %.pn123, %.pn124		; <double> [#uses=1]
+	%.pn67 = fsub double 0.000000e+00, %.pn120		; <double> [#uses=1]
+	%.pn56 = fsub double %.pn117, %.pn118		; <double> [#uses=1]
+	%.pn42 = fsub double 0.000000e+00, %.pn88		; <double> [#uses=1]
+	%.pn60 = fmul double %.pn81, 0.000000e+00		; <double> [#uses=1]
+	%.pn57 = fadd double %.pn77, 0.000000e+00		; <double> [#uses=1]
+	%.pn58 = fmul double %.pn75, %.pn165.in		; <double> [#uses=1]
+	%.pn32 = fadd double %.pn69, 0.000000e+00		; <double> [#uses=1]
+	%.pn33 = fmul double %.pn67, %.pn165.in		; <double> [#uses=1]
+	%.pn17 = fsub double 0.000000e+00, %.pn60		; <double> [#uses=1]
+	%.pn9 = fadd double %.pn57, %.pn58		; <double> [#uses=1]
+	%.pn30 = fmul double 0.000000e+00, %.pn56		; <double> [#uses=1]
+	%.pn24 = fmul double 0.000000e+00, %.pn42		; <double> [#uses=1]
+	%.pn1 = fadd double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fsub double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn26 = fadd double %.pn28, 0.000000e+00		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, 0.000000e+00		; <double> [#uses=1]
+	%.pn20 = fsub double %.pn24, 0.000000e+00		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fadd double %.pn20, 0.000000e+00		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, 0.000000e+00		; <double> [#uses=1]
+	%.pn12 = fsub double %.pn16, %.pn17		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, 0.000000e+00		; <double> [#uses=1]
+	%.pn8 = fadd double %.pn12, 0.000000e+00		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, 0.000000e+00		; <double> [#uses=1]
+	%.pn4 = fsub double %.pn8, %.pn9		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, 0.000000e+00		; <double> [#uses=1]
+	%.pn = fadd double %.pn4, 0.000000e+00		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn2, 0.000000e+00		; <double> [#uses=2]
+	%D1.0 = fsub double %.pn, %.pn1		; <double> [#uses=2]
 	br i1 %6, label %bb62, label %bb64
 
 bb62:		; preds = %bb55
-	%7 = mul double 0.000000e+00, %4		; <double> [#uses=1]
-	%8 = sub double -0.000000e+00, %7		; <double> [#uses=3]
-	%9 = mul double 0.000000e+00, %5		; <double> [#uses=1]
-	%10 = sub double -0.000000e+00, %9		; <double> [#uses=3]
-	%11 = mul double %.pn404, %4		; <double> [#uses=5]
-	%12 = mul double %.pn402, %5		; <double> [#uses=5]
-	%13 = mul double 0.000000e+00, -2.000000e+00		; <double> [#uses=1]
+	%7 = fmul double 0.000000e+00, %4		; <double> [#uses=1]
+	%8 = fsub double -0.000000e+00, %7		; <double> [#uses=3]
+	%9 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%10 = fsub double -0.000000e+00, %9		; <double> [#uses=3]
+	%11 = fmul double %.pn404, %4		; <double> [#uses=5]
+	%12 = fmul double %.pn402, %5		; <double> [#uses=5]
+	%13 = fmul double 0.000000e+00, -2.000000e+00		; <double> [#uses=1]
 	%14 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
-	%15 = sub double 0.000000e+00, %14		; <double> [#uses=1]
-	%16 = mul double 0.000000e+00, %15		; <double> [#uses=1]
-	%17 = add double %13, %16		; <double> [#uses=1]
-	%18 = mul double %.pn165.in, -2.000000e+00		; <double> [#uses=5]
-	%19 = mul double %18, 0.000000e+00		; <double> [#uses=1]
-	%20 = add double %17, %19		; <double> [#uses=1]
-	%21 = mul double 0.000000e+00, %20		; <double> [#uses=1]
-	%22 = add double 0.000000e+00, %21		; <double> [#uses=1]
+	%15 = fsub double 0.000000e+00, %14		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
+	%17 = fadd double %13, %16		; <double> [#uses=1]
+	%18 = fmul double %.pn165.in, -2.000000e+00		; <double> [#uses=5]
+	%19 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%20 = fadd double %17, %19		; <double> [#uses=1]
+	%21 = fmul double 0.000000e+00, %20		; <double> [#uses=1]
+	%22 = fadd double 0.000000e+00, %21		; <double> [#uses=1]
 	%23 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
-	%24 = sub double 0.000000e+00, %23		; <double> [#uses=0]
-	%25 = mul double %18, 0.000000e+00		; <double> [#uses=1]
-	%26 = add double 0.000000e+00, %25		; <double> [#uses=1]
-	%27 = mul double 0.000000e+00, %26		; <double> [#uses=1]
-	%28 = sub double %22, %27		; <double> [#uses=1]
-	%29 = mul double %11, %4		; <double> [#uses=1]
-	%30 = mul double %12, %5		; <double> [#uses=3]
-	%31 = mul double %.pn165.in, -4.000000e+00		; <double> [#uses=1]
-	%32 = mul double %.pn165.in, 0x3FF5555555555555		; <double> [#uses=1]
-	%33 = mul double %32, 0.000000e+00		; <double> [#uses=2]
-	%34 = add double %28, 0.000000e+00		; <double> [#uses=1]
-	%35 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%24 = fsub double 0.000000e+00, %23		; <double> [#uses=0]
+	%25 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%26 = fadd double 0.000000e+00, %25		; <double> [#uses=1]
+	%27 = fmul double 0.000000e+00, %26		; <double> [#uses=1]
+	%28 = fsub double %22, %27		; <double> [#uses=1]
+	%29 = fmul double %11, %4		; <double> [#uses=1]
+	%30 = fmul double %12, %5		; <double> [#uses=3]
+	%31 = fmul double %.pn165.in, -4.000000e+00		; <double> [#uses=1]
+	%32 = fmul double %.pn165.in, 0x3FF5555555555555		; <double> [#uses=1]
+	%33 = fmul double %32, 0.000000e+00		; <double> [#uses=2]
+	%34 = fadd double %28, 0.000000e+00		; <double> [#uses=1]
+	%35 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%36 = fdiv double %35, %11		; <double> [#uses=1]
 	%37 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
-	%38 = sub double %36, %37		; <double> [#uses=1]
-	%39 = mul double 0.000000e+00, %38		; <double> [#uses=1]
-	%40 = add double 0.000000e+00, %39		; <double> [#uses=1]
-	%41 = add double %40, 0.000000e+00		; <double> [#uses=1]
-	%42 = add double %41, 0.000000e+00		; <double> [#uses=1]
-	%43 = mul double %42, 0.000000e+00		; <double> [#uses=1]
-	%44 = sub double %34, %43		; <double> [#uses=1]
+	%38 = fsub double %36, %37		; <double> [#uses=1]
+	%39 = fmul double 0.000000e+00, %38		; <double> [#uses=1]
+	%40 = fadd double 0.000000e+00, %39		; <double> [#uses=1]
+	%41 = fadd double %40, 0.000000e+00		; <double> [#uses=1]
+	%42 = fadd double %41, 0.000000e+00		; <double> [#uses=1]
+	%43 = fmul double %42, 0.000000e+00		; <double> [#uses=1]
+	%44 = fsub double %34, %43		; <double> [#uses=1]
 	%45 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
-	%46 = sub double -0.000000e+00, %45		; <double> [#uses=2]
+	%46 = fsub double -0.000000e+00, %45		; <double> [#uses=2]
 	%47 = fdiv double %46, 0.000000e+00		; <double> [#uses=1]
-	%48 = mul double %30, %5		; <double> [#uses=1]
+	%48 = fmul double %30, %5		; <double> [#uses=1]
 	%49 = fdiv double 0.000000e+00, %48		; <double> [#uses=1]
-	%50 = sub double %47, %49		; <double> [#uses=1]
-	%51 = mul double %50, -4.000000e+00		; <double> [#uses=1]
-	%52 = add double %51, 0.000000e+00		; <double> [#uses=1]
+	%50 = fsub double %47, %49		; <double> [#uses=1]
+	%51 = fmul double %50, -4.000000e+00		; <double> [#uses=1]
+	%52 = fadd double %51, 0.000000e+00		; <double> [#uses=1]
 	%53 = fdiv double %46, %11		; <double> [#uses=1]
-	%54 = sub double %53, 0.000000e+00		; <double> [#uses=1]
-	%55 = mul double %31, %54		; <double> [#uses=1]
-	%56 = add double %52, %55		; <double> [#uses=1]
-	%57 = add double %56, 0.000000e+00		; <double> [#uses=1]
-	%58 = add double %44, %57		; <double> [#uses=1]
-	%59 = sub double %58, 0.000000e+00		; <double> [#uses=1]
+	%54 = fsub double %53, 0.000000e+00		; <double> [#uses=1]
+	%55 = fmul double %31, %54		; <double> [#uses=1]
+	%56 = fadd double %52, %55		; <double> [#uses=1]
+	%57 = fadd double %56, 0.000000e+00		; <double> [#uses=1]
+	%58 = fadd double %44, %57		; <double> [#uses=1]
+	%59 = fsub double %58, 0.000000e+00		; <double> [#uses=1]
 	%60 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%61 = sub double -0.000000e+00, %60		; <double> [#uses=1]
+	%61 = fsub double -0.000000e+00, %60		; <double> [#uses=1]
 	%62 = fdiv double 0.000000e+00, -6.000000e+00		; <double> [#uses=1]
 	%63 = fdiv double %61, %5		; <double> [#uses=1]
-	%64 = sub double 0.000000e+00, %63		; <double> [#uses=1]
-	%65 = mul double %62, %64		; <double> [#uses=1]
-	%66 = sub double 0.000000e+00, %65		; <double> [#uses=1]
-	%67 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=2]
+	%64 = fsub double 0.000000e+00, %63		; <double> [#uses=1]
+	%65 = fmul double %62, %64		; <double> [#uses=1]
+	%66 = fsub double 0.000000e+00, %65		; <double> [#uses=1]
+	%67 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=2]
 	%68 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
-	%69 = sub double -0.000000e+00, %68		; <double> [#uses=2]
+	%69 = fsub double -0.000000e+00, %68		; <double> [#uses=2]
 	%70 = fdiv double %67, %.pn404		; <double> [#uses=1]
 	%71 = fdiv double %69, %.pn402		; <double> [#uses=1]
-	%72 = sub double %70, %71		; <double> [#uses=1]
-	%73 = mul double %72, -5.000000e-01		; <double> [#uses=1]
+	%72 = fsub double %70, %71		; <double> [#uses=1]
+	%73 = fmul double %72, -5.000000e-01		; <double> [#uses=1]
 	%74 = fdiv double %67, %4		; <double> [#uses=1]
 	%75 = fdiv double %69, %5		; <double> [#uses=1]
-	%76 = sub double %74, %75		; <double> [#uses=1]
-	%77 = mul double %76, 0.000000e+00		; <double> [#uses=1]
-	%78 = add double %73, %77		; <double> [#uses=1]
-	%79 = mul double 0.000000e+00, %78		; <double> [#uses=1]
-	%80 = add double %66, %79		; <double> [#uses=1]
+	%76 = fsub double %74, %75		; <double> [#uses=1]
+	%77 = fmul double %76, 0.000000e+00		; <double> [#uses=1]
+	%78 = fadd double %73, %77		; <double> [#uses=1]
+	%79 = fmul double 0.000000e+00, %78		; <double> [#uses=1]
+	%80 = fadd double %66, %79		; <double> [#uses=1]
 	%81 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
 	%82 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
-	%83 = sub double %81, %82		; <double> [#uses=1]
-	%84 = mul double %83, -5.000000e-01		; <double> [#uses=1]
+	%83 = fsub double %81, %82		; <double> [#uses=1]
+	%84 = fmul double %83, -5.000000e-01		; <double> [#uses=1]
 	%85 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
 	%86 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
-	%87 = sub double %85, %86		; <double> [#uses=1]
-	%88 = mul double %87, 0.000000e+00		; <double> [#uses=1]
-	%89 = add double %84, %88		; <double> [#uses=1]
-	%90 = mul double 0.000000e+00, %89		; <double> [#uses=1]
-	%91 = sub double %80, %90		; <double> [#uses=1]
+	%87 = fsub double %85, %86		; <double> [#uses=1]
+	%88 = fmul double %87, 0.000000e+00		; <double> [#uses=1]
+	%89 = fadd double %84, %88		; <double> [#uses=1]
+	%90 = fmul double 0.000000e+00, %89		; <double> [#uses=1]
+	%91 = fsub double %80, %90		; <double> [#uses=1]
 	%92 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
-	%93 = sub double -0.000000e+00, %92		; <double> [#uses=1]
+	%93 = fsub double -0.000000e+00, %92		; <double> [#uses=1]
 	%94 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
-	%95 = sub double -0.000000e+00, %94		; <double> [#uses=3]
+	%95 = fsub double -0.000000e+00, %94		; <double> [#uses=3]
 	%96 = fdiv double %95, %.pn402		; <double> [#uses=1]
-	%97 = sub double 0.000000e+00, %96		; <double> [#uses=1]
-	%98 = mul double 0.000000e+00, %97		; <double> [#uses=1]
+	%97 = fsub double 0.000000e+00, %96		; <double> [#uses=1]
+	%98 = fmul double 0.000000e+00, %97		; <double> [#uses=1]
 	%99 = fdiv double %93, %11		; <double> [#uses=1]
 	%100 = fdiv double %95, %12		; <double> [#uses=1]
-	%101 = sub double %99, %100		; <double> [#uses=1]
-	%102 = sub double %98, %101		; <double> [#uses=1]
+	%101 = fsub double %99, %100		; <double> [#uses=1]
+	%102 = fsub double %98, %101		; <double> [#uses=1]
 	%103 = fdiv double %95, %5		; <double> [#uses=1]
-	%104 = sub double 0.000000e+00, %103		; <double> [#uses=1]
-	%105 = mul double %18, %104		; <double> [#uses=1]
-	%106 = add double %102, %105		; <double> [#uses=1]
-	%107 = mul double %106, %k.4		; <double> [#uses=1]
-	%108 = add double %91, %107		; <double> [#uses=1]
-	%109 = sub double %108, 0.000000e+00		; <double> [#uses=1]
+	%104 = fsub double 0.000000e+00, %103		; <double> [#uses=1]
+	%105 = fmul double %18, %104		; <double> [#uses=1]
+	%106 = fadd double %102, %105		; <double> [#uses=1]
+	%107 = fmul double %106, %k.4		; <double> [#uses=1]
+	%108 = fadd double %91, %107		; <double> [#uses=1]
+	%109 = fsub double %108, 0.000000e+00		; <double> [#uses=1]
 	%110 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
-	%111 = sub double -0.000000e+00, %110		; <double> [#uses=2]
+	%111 = fsub double -0.000000e+00, %110		; <double> [#uses=2]
 	%112 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
-	%113 = sub double -0.000000e+00, %112		; <double> [#uses=2]
+	%113 = fsub double -0.000000e+00, %112		; <double> [#uses=2]
 	%114 = fdiv double %111, %11		; <double> [#uses=1]
 	%115 = fdiv double %113, %12		; <double> [#uses=1]
-	%116 = sub double %114, %115		; <double> [#uses=1]
-	%117 = mul double 0.000000e+00, %116		; <double> [#uses=1]
+	%116 = fsub double %114, %115		; <double> [#uses=1]
+	%117 = fmul double 0.000000e+00, %116		; <double> [#uses=1]
 	%118 = fdiv double %111, %29		; <double> [#uses=1]
 	%119 = fdiv double %113, %30		; <double> [#uses=1]
-	%120 = sub double %118, %119		; <double> [#uses=1]
-	%121 = sub double %117, %120		; <double> [#uses=1]
-	%122 = mul double %18, 0.000000e+00		; <double> [#uses=1]
-	%123 = add double %121, %122		; <double> [#uses=1]
-	%124 = mul double %33, 0.000000e+00		; <double> [#uses=1]
-	%125 = add double %123, %124		; <double> [#uses=1]
-	%126 = add double %109, %125		; <double> [#uses=1]
+	%120 = fsub double %118, %119		; <double> [#uses=1]
+	%121 = fsub double %117, %120		; <double> [#uses=1]
+	%122 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%123 = fadd double %121, %122		; <double> [#uses=1]
+	%124 = fmul double %33, 0.000000e+00		; <double> [#uses=1]
+	%125 = fadd double %123, %124		; <double> [#uses=1]
+	%126 = fadd double %109, %125		; <double> [#uses=1]
 	%127 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%128 = sub double -0.000000e+00, %127		; <double> [#uses=2]
+	%128 = fsub double -0.000000e+00, %127		; <double> [#uses=2]
 	%129 = fdiv double %128, %30		; <double> [#uses=1]
-	%130 = sub double 0.000000e+00, %129		; <double> [#uses=1]
-	%131 = sub double 0.000000e+00, %130		; <double> [#uses=1]
+	%130 = fsub double 0.000000e+00, %129		; <double> [#uses=1]
+	%131 = fsub double 0.000000e+00, %130		; <double> [#uses=1]
 	%132 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
-	%133 = sub double %132, 0.000000e+00		; <double> [#uses=1]
-	%134 = mul double %18, %133		; <double> [#uses=1]
-	%135 = add double %131, %134		; <double> [#uses=1]
+	%133 = fsub double %132, 0.000000e+00		; <double> [#uses=1]
+	%134 = fmul double %18, %133		; <double> [#uses=1]
+	%135 = fadd double %131, %134		; <double> [#uses=1]
 	%136 = fdiv double %128, %5		; <double> [#uses=1]
-	%137 = sub double 0.000000e+00, %136		; <double> [#uses=1]
-	%138 = mul double %33, %137		; <double> [#uses=1]
-	%139 = add double %135, %138		; <double> [#uses=1]
-	%140 = sub double %126, %139		; <double> [#uses=1]
-	%141 = add double %N1.0, %59		; <double> [#uses=1]
-	%142 = add double %D1.0, %140		; <double> [#uses=1]
+	%137 = fsub double 0.000000e+00, %136		; <double> [#uses=1]
+	%138 = fmul double %33, %137		; <double> [#uses=1]
+	%139 = fadd double %135, %138		; <double> [#uses=1]
+	%140 = fsub double %126, %139		; <double> [#uses=1]
+	%141 = fadd double %N1.0, %59		; <double> [#uses=1]
+	%142 = fadd double %D1.0, %140		; <double> [#uses=1]
 	br label %bb64
 
 bb64:		; preds = %bb62, %bb55
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 7556616..399ed30 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -26,39 +26,39 @@ entry:
 
 bb3:		; preds = %entry
 	%2 = fdiv double 1.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = mul double 0.000000e+00, %2		; <double> [#uses=2]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=2]
 	%4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
 	%5 = fdiv double 1.000000e+00, %4		; <double> [#uses=2]
-	%6 = mul double %3, %5		; <double> [#uses=2]
-	%7 = mul double 0.000000e+00, %5		; <double> [#uses=2]
-	%8 = mul double %3, %7		; <double> [#uses=1]
-	%9 = sub double %8, 0.000000e+00		; <double> [#uses=1]
-	%10 = mul double 0.000000e+00, %6		; <double> [#uses=1]
-	%11 = sub double 0.000000e+00, %10		; <double> [#uses=1]
-	%12 = sub double -0.000000e+00, %11		; <double> [#uses=1]
-	%13 = mul double %0, %0		; <double> [#uses=2]
-	%14 = sub double %13, 0.000000e+00		; <double> [#uses=1]
+	%6 = fmul double %3, %5		; <double> [#uses=2]
+	%7 = fmul double 0.000000e+00, %5		; <double> [#uses=2]
+	%8 = fmul double %3, %7		; <double> [#uses=1]
+	%9 = fsub double %8, 0.000000e+00		; <double> [#uses=1]
+	%10 = fmul double 0.000000e+00, %6		; <double> [#uses=1]
+	%11 = fsub double 0.000000e+00, %10		; <double> [#uses=1]
+	%12 = fsub double -0.000000e+00, %11		; <double> [#uses=1]
+	%13 = fmul double %0, %0		; <double> [#uses=2]
+	%14 = fsub double %13, 0.000000e+00		; <double> [#uses=1]
 	%15 = call double @llvm.sqrt.f64(double %14)		; <double> [#uses=1]
-	%16 = mul double 0.000000e+00, %15		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
 	%17 = fdiv double %16, %0		; <double> [#uses=1]
-	%18 = add double 0.000000e+00, %17		; <double> [#uses=1]
+	%18 = fadd double 0.000000e+00, %17		; <double> [#uses=1]
 	%19 = call double @acos(double %18) nounwind readonly		; <double> [#uses=1]
 	%20 = load double* null, align 4		; <double> [#uses=1]
-	%21 = mul double %20, 0x401921FB54442D18		; <double> [#uses=1]
+	%21 = fmul double %20, 0x401921FB54442D18		; <double> [#uses=1]
 	%22 = call double @sin(double %19) nounwind readonly		; <double> [#uses=2]
-	%23 = mul double %22, 0.000000e+00		; <double> [#uses=2]
-	%24 = mul double %6, %23		; <double> [#uses=1]
-	%25 = mul double %7, %23		; <double> [#uses=1]
+	%23 = fmul double %22, 0.000000e+00		; <double> [#uses=2]
+	%24 = fmul double %6, %23		; <double> [#uses=1]
+	%25 = fmul double %7, %23		; <double> [#uses=1]
 	%26 = call double @sin(double %21) nounwind readonly		; <double> [#uses=1]
-	%27 = mul double %22, %26		; <double> [#uses=2]
-	%28 = mul double %9, %27		; <double> [#uses=1]
-	%29 = mul double %27, %12		; <double> [#uses=1]
-	%30 = add double %24, %28		; <double> [#uses=1]
-	%31 = add double 0.000000e+00, %29		; <double> [#uses=1]
-	%32 = add double %25, 0.000000e+00		; <double> [#uses=1]
-	%33 = add double %30, 0.000000e+00		; <double> [#uses=1]
-	%34 = add double %31, 0.000000e+00		; <double> [#uses=1]
-	%35 = add double %32, 0.000000e+00		; <double> [#uses=1]
+	%27 = fmul double %22, %26		; <double> [#uses=2]
+	%28 = fmul double %9, %27		; <double> [#uses=1]
+	%29 = fmul double %27, %12		; <double> [#uses=1]
+	%30 = fadd double %24, %28		; <double> [#uses=1]
+	%31 = fadd double 0.000000e+00, %29		; <double> [#uses=1]
+	%32 = fadd double %25, 0.000000e+00		; <double> [#uses=1]
+	%33 = fadd double %30, 0.000000e+00		; <double> [#uses=1]
+	%34 = fadd double %31, 0.000000e+00		; <double> [#uses=1]
+	%35 = fadd double %32, 0.000000e+00		; <double> [#uses=1]
 	%36 = bitcast %struct.ggPoint3* %x to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
 	store double %33, double* null, align 8
@@ -68,9 +68,9 @@ bb5.i.i.i:		; preds = %bb3
 	unreachable
 
 _Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit:		; preds = %bb3
-	%37 = sub double %13, 0.000000e+00		; <double> [#uses=0]
-	%38 = sub double -0.000000e+00, %34		; <double> [#uses=0]
-	%39 = sub double -0.000000e+00, %35		; <double> [#uses=0]
+	%37 = fsub double %13, 0.000000e+00		; <double> [#uses=0]
+	%38 = fsub double -0.000000e+00, %34		; <double> [#uses=0]
+	%39 = fsub double -0.000000e+00, %35		; <double> [#uses=0]
 	ret i32 1
 
 bb7:		; preds = %entry
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index 9dc3b34..f394847 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -4,8 +4,8 @@ define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %C
 entry:
 	%input2 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
 	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>		; <<4 x float>> [#uses=1]
-	%mul1 = mul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
-	%add2 = add <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
+	%mul1 = fmul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
+	%add2 = fadd <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
 	store <4 x float> %add2, <4 x float>* null, align 16
 	ret void
 }
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
new file mode 100644
index 0000000..5eaae7a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -0,0 +1,263 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6
+
+	%struct.anon = type { i16, i16 }
+	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
+	%struct.cab_file = type { i32, i16, i64, i8*, i32, i32, i32, %struct.cab_folder*, %struct.cab_file*, %struct.cab_archive*, %struct.cab_state* }
+	%struct.cab_folder = type { i16, i16, %struct.cab_archive*, i64, %struct.cab_folder* }
+	%struct.cab_state = type { i8*, i8*, [38912 x i8], i16, i16, i8*, i16 }
+	%struct.qtm_model = type { i32, i32, %struct.anon* }
+	%struct.qtm_stream = type { i32, i32, i8, i8*, i32, i32, i32, i16, i16, i16, i8, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i8, [42 x i32], [42 x i8], [27 x i8], [27 x i8], %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [25 x %struct.anon], [37 x %struct.anon], [43 x %struct.anon], [28 x %struct.anon], [8 x %struct.anon], %struct.cab_file*, i32 (%struct.cab_file*, i8*, i32)* }
+
+declare fastcc i32 @qtm_read_input(%struct.qtm_stream* nocapture) nounwind
+
+define fastcc i32 @qtm_decompress(%struct.qtm_stream* %qtm, i64 %out_bytes) nounwind {
+entry:
+	br i1 undef, label %bb245, label %bb3
+
+bb3:		; preds = %entry
+	br i1 undef, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	ret i32 undef
+
+bb5:		; preds = %bb3
+	br i1 undef, label %bb245, label %bb14
+
+bb14:		; preds = %bb5
+	br label %bb238
+
+bb28:		; preds = %bb215
+	br label %bb31
+
+bb29:		; preds = %bb31
+	br i1 undef, label %bb31, label %bb32
+
+bb31:		; preds = %bb29, %bb28
+	br i1 undef, label %bb29, label %bb32
+
+bb32:		; preds = %bb31, %bb29
+	br label %bb33
+
+bb33:		; preds = %bb33, %bb32
+	br i1 undef, label %bb34, label %bb33
+
+bb34:		; preds = %bb33
+	br i1 undef, label %bb35, label %bb36
+
+bb35:		; preds = %bb34
+	br label %bb36
+
+bb36:		; preds = %bb46, %bb35, %bb34
+	br i1 undef, label %bb40, label %bb37
+
+bb37:		; preds = %bb36
+	br i1 undef, label %bb77, label %bb60
+
+bb40:		; preds = %bb36
+	br i1 undef, label %bb46, label %bb41
+
+bb41:		; preds = %bb40
+	br i1 undef, label %bb45, label %bb42
+
+bb42:		; preds = %bb41
+	ret i32 undef
+
+bb45:		; preds = %bb41
+	br label %bb46
+
+bb46:		; preds = %bb45, %bb40
+	br label %bb36
+
+bb60:		; preds = %bb60, %bb37
+	br label %bb60
+
+bb77:		; preds = %bb37
+	switch i32 undef, label %bb197 [
+		i32 5, label %bb108
+		i32 6, label %bb138
+	]
+
+bb108:		; preds = %bb77
+	br label %bb111
+
+bb109:		; preds = %bb111
+	br i1 undef, label %bb111, label %bb112
+
+bb111:		; preds = %bb109, %bb108
+	br i1 undef, label %bb109, label %bb112
+
+bb112:		; preds = %bb111, %bb109
+	br label %bb113
+
+bb113:		; preds = %bb113, %bb112
+	br i1 undef, label %bb114, label %bb113
+
+bb114:		; preds = %bb113
+	br i1 undef, label %bb115, label %bb116
+
+bb115:		; preds = %bb114
+	br label %bb116
+
+bb116:		; preds = %bb115, %bb114
+	br i1 undef, label %bb120, label %bb117
+
+bb117:		; preds = %bb116
+	br label %bb136
+
+bb120:		; preds = %bb116
+	ret i32 undef
+
+bb128:		; preds = %bb136
+	br i1 undef, label %bb134, label %bb129
+
+bb129:		; preds = %bb128
+	br i1 undef, label %bb133, label %bb130
+
+bb130:		; preds = %bb129
+	br i1 undef, label %bb132, label %bb131
+
+bb131:		; preds = %bb130
+	ret i32 undef
+
+bb132:		; preds = %bb130
+	br label %bb133
+
+bb133:		; preds = %bb132, %bb129
+	br label %bb134
+
+bb134:		; preds = %bb133, %bb128
+	br label %bb136
+
+bb136:		; preds = %bb134, %bb117
+	br i1 undef, label %bb198, label %bb128
+
+bb138:		; preds = %bb77
+	%0 = trunc i32 undef to i16		; <i16> [#uses=1]
+	br label %bb141
+
+bb139:		; preds = %bb141
+	%scevgep441442881 = load i16* undef		; <i16> [#uses=1]
+	%1 = icmp ugt i16 %scevgep441442881, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb141, label %bb142
+
+bb141:		; preds = %bb139, %bb138
+	br i1 undef, label %bb139, label %bb142
+
+bb142:		; preds = %bb141, %bb139
+	br label %bb143
+
+bb143:		; preds = %bb143, %bb142
+	br i1 undef, label %bb144, label %bb143
+
+bb144:		; preds = %bb143
+	br i1 undef, label %bb145, label %bb146
+
+bb145:		; preds = %bb144
+	unreachable
+
+bb146:		; preds = %bb156, %bb144
+	br i1 undef, label %bb150, label %bb147
+
+bb147:		; preds = %bb146
+	br i1 undef, label %bb157, label %bb148
+
+bb148:		; preds = %bb147
+	br i1 undef, label %bb149, label %bb157
+
+bb149:		; preds = %bb148
+	br label %bb150
+
+bb150:		; preds = %bb149, %bb146
+	br i1 undef, label %bb156, label %bb152
+
+bb152:		; preds = %bb150
+	unreachable
+
+bb156:		; preds = %bb150
+	br label %bb146
+
+bb157:		; preds = %bb148, %bb147
+	br i1 undef, label %bb167, label %bb160
+
+bb160:		; preds = %bb157
+	ret i32 undef
+
+bb167:		; preds = %bb157
+	br label %bb170
+
+bb168:		; preds = %bb170
+	br i1 undef, label %bb170, label %bb171
+
+bb170:		; preds = %bb168, %bb167
+	br i1 undef, label %bb168, label %bb171
+
+bb171:		; preds = %bb170, %bb168
+	br label %bb172
+
+bb172:		; preds = %bb172, %bb171
+	br i1 undef, label %bb173, label %bb172
+
+bb173:		; preds = %bb172
+	br i1 undef, label %bb174, label %bb175
+
+bb174:		; preds = %bb173
+	unreachable
+
+bb175:		; preds = %bb179, %bb173
+	br i1 undef, label %bb179, label %bb176
+
+bb176:		; preds = %bb175
+	br i1 undef, label %bb186, label %bb177
+
+bb177:		; preds = %bb176
+	br i1 undef, label %bb178, label %bb186
+
+bb178:		; preds = %bb177
+	br label %bb179
+
+bb179:		; preds = %bb178, %bb175
+	br label %bb175
+
+bb186:		; preds = %bb177, %bb176
+	br label %bb195
+
+bb187:		; preds = %bb195
+	br i1 undef, label %bb193, label %bb189
+
+bb189:		; preds = %bb187
+	%2 = tail call fastcc i32 @qtm_read_input(%struct.qtm_stream* %qtm) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+
+bb193:		; preds = %bb187
+	br label %bb195
+
+bb195:		; preds = %bb193, %bb186
+	br i1 undef, label %bb198, label %bb187
+
+bb197:		; preds = %bb77
+	ret i32 -124
+
+bb198:		; preds = %bb195, %bb136
+	br i1 undef, label %bb211.preheader, label %bb214
+
+bb211.preheader:		; preds = %bb198
+	br label %bb211
+
+bb211:		; preds = %bb211, %bb211.preheader
+	br i1 undef, label %bb214, label %bb211
+
+bb214:		; preds = %bb211, %bb198
+	br label %bb215
+
+bb215:		; preds = %bb238, %bb214
+	br i1 undef, label %bb28, label %bb216
+
+bb216:		; preds = %bb215
+	br label %bb238
+
+bb238:		; preds = %bb216, %bb14
+	br label %bb215
+
+bb245:		; preds = %bb5, %entry
+	ret i32 undef
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 3b499a4..4f4091a 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -16,7 +16,7 @@ bb28.i:		; preds = %bb28.i, %entry
 	br i1 false, label %bb502.loopexit.i, label %bb28.i
 
 bb.nph53.i:		; preds = %bb502.loopexit.i
-	%tmp354.i = sub double -0.000000e+00, %tmp10.i4		; <double> [#uses=0]
+	%tmp354.i = fsub double -0.000000e+00, %tmp10.i4		; <double> [#uses=0]
 	br label %bb244.i
 
 bb244.i:		; preds = %bb244.i, %bb.nph53.i
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index d3038b9..777a3d6 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -13,7 +13,7 @@ bb5:		; preds = %bb3
 	%u.in.mask = and i64 %x14, -4294967296		; <i64> [#uses=1]
 	%.ins = or i64 0, %u.in.mask		; <i64> [#uses=1]
 	%0 = bitcast i64 %.ins to double		; <double> [#uses=1]
-	%1 = sub double %x, %0		; <double> [#uses=1]
+	%1 = fsub double %x, %0		; <double> [#uses=1]
 	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
 	%3 = add i32 %2, 0		; <i32> [#uses=1]
 	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 87a30c9..7bbda2d 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -4,8 +4,8 @@
 
 define double @t1(double %a, double %b) {
 entry:
-        %tmp2 = sub double -0.000000e+00, %a            ; <double> [#uses=1]
-        %tmp4 = mul double %tmp2, %b            ; <double> [#uses=1]
+        %tmp2 = fsub double -0.000000e+00, %a            ; <double> [#uses=1]
+        %tmp4 = fmul double %tmp2, %b            ; <double> [#uses=1]
         ret double %tmp4
 }
 
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index 11933d5..568a6c4 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -10,49 +10,49 @@
 
 define float @f1(float %a, float %b) {
 entry:
-	%tmp = add float %a, %b		; <float> [#uses=1]
+	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f2(double %a, double %b) {
 entry:
-	%tmp = add double %a, %b		; <double> [#uses=1]
+	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f3(float %a, float %b) {
 entry:
-	%tmp = mul float %a, %b		; <float> [#uses=1]
+	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f4(double %a, double %b) {
 entry:
-	%tmp = mul double %a, %b		; <double> [#uses=1]
+	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f5(float %a, float %b) {
 entry:
-	%tmp = sub float %a, %b		; <float> [#uses=1]
+	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f6(double %a, double %b) {
 entry:
-	%tmp = sub double %a, %b		; <double> [#uses=1]
+	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f7(float %a) {
 entry:
-	%tmp1 = sub float -0.000000e+00, %a		; <float> [#uses=1]
+	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f8(double %a) {
 entry:
-	%tmp1 = sub double -0.000000e+00, %a		; <double> [#uses=1]
+	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
 }
 
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 48204ec..13653bb 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -11,12 +11,12 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
         %tmp = load float* %v           ; <float> [#uses=1]
-        %tmp1 = add float %tmp, %u              ; <float> [#uses=1]
+        %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
 }
 
 define void @f3(float %a, float %b, float* %v) {
-        %tmp = add float %a, %b         ; <float> [#uses=1]
+        %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
 }
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index 79f9929..ad24eb5 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -3,7 +3,7 @@
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
   %h = load <8 x float>* %f
-  %i = mul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
+  %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
   %m = bitcast <8 x float> %i to <4 x i64>
   %z = load <4 x i64>* %y
   %n = mul <4 x i64> %z, %m
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 6db0d43..02902f2 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep -F {str r2, \[r0, +r3, lsl #2\]}
+; RUN: llvm-as < %s | llc -march=arm | grep lsl | grep -F {lsl #2\]}
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 5d1beea..4bf0b4f 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,9 +1,7 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep stmia
 ; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrb
 ; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrh
-; This used to look for ldmia. But it's no longer lucky enough to
-; have the load / store instructions lined up just right after
-; scheduler change for pr3457. We'll look for a robust solution
-; later.
 
 	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 @src = external global %struct.x
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 247465f..e9f1945 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,6 +1,5 @@
 ; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {str.*\\!}
 ; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
-; XFAIL: *
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 2acb33f..f58da44 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -39,10 +39,10 @@ define void @test_abs(float* %P, double* %D) {
 
 define void @test_add(float* %P, double* %D) {
 	%a = load float* %P		; <float> [#uses=2]
-	%b = add float %a, %a		; <float> [#uses=1]
+	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=2]
-	%B = add double %A, %A		; <double> [#uses=1]
+	%B = fadd double %A, %A		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
@@ -61,8 +61,8 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
-	%X = mul float %a1, %a2		; <float> [#uses=1]
-	%Y = sub float %X, %a3		; <float> [#uses=1]
+	%X = fmul float %a1, %a2		; <float> [#uses=1]
+	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
 	ret void
 }
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
index ca4e48e..f8393a3 100644
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
@@ -23,7 +23,7 @@ define double @test4(i64 %L) {
 
 define double @test5(double %D) {
         %X = bitcast double %D to double                ; <double> [#uses=1]
-        %Y = add double %X, 2.000000e+00                ; <double> [#uses=1]
+        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
         %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
         %res = bitcast i64 %Z to double         ; <double> [#uses=1]
         ret double %res
@@ -31,7 +31,7 @@ define double @test5(double %D) {
 
 define float @test6(float %F) {
         %X = bitcast float %F to float          ; <float> [#uses=1]
-        %Y = add float %X, 2.000000e+00         ; <float> [#uses=1]
+        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
         %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
         %res = bitcast i32 %Z to float          ; <float> [#uses=1]
         ret float %res
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
index afcac99..32d635a 100644
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
@@ -20,7 +20,7 @@ entry:
 	br label %bb4
 
 bb4:		; preds = %bb5.split, %bb4, %entry
-	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 mul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 mul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
+	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
 	br i1 %0, label %bb4, label %bb5.split
 
 bb5.split:		; preds = %bb4
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
new file mode 100644
index 0000000..68849b2
--- /dev/null
+++ b/test/CodeGen/CBackend/fneg.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=c
+
+define void @func() nounwind {
+  entry:
+  %0 = fsub double -0.0, undef
+  ret void
+}
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
index de78975..d01e992 100644
--- a/test/CodeGen/CBackend/vectors.ll
+++ b/test/CodeGen/CBackend/vectors.ll
@@ -14,7 +14,7 @@ define i32 @test2(<4 x i32> %a, i32 %b) {
 }
 
 define <4 x float> @test3(<4 x float> %Y) {
-	%Z = add <4 x float> %Y, %Y
+	%Z = fadd <4 x float> %Y, %Y
 	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
 	ret <4 x float> %X
 }
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index 2579a40..d4802ae 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -11,88 +11,88 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i
 target triple = "spu"
 
 define double @fadd(double %arg1, double %arg2) {
-        %A = add double %arg1, %arg2
+        %A = fadd double %arg1, %arg2
         ret double %A
 }
 
 define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = add <2 x double> %arg1, %arg2
+        %A = fadd <2 x double> %arg1, %arg2
         ret <2 x double> %A
 }
 
 define double @fsub(double %arg1, double %arg2) {
-        %A = sub double %arg1,  %arg2
+        %A = fsub double %arg1,  %arg2
         ret double %A
 }
 
 define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = sub <2 x double> %arg1,  %arg2
+        %A = fsub <2 x double> %arg1,  %arg2
         ret <2 x double> %A
 }
 
 define double @fmul(double %arg1, double %arg2) {
-        %A = mul double %arg1,  %arg2
+        %A = fmul double %arg1,  %arg2
         ret double %A
 }
 
 define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = mul <2 x double> %arg1,  %arg2
+        %A = fmul <2 x double> %arg1,  %arg2
         ret <2 x double> %A
 }
 
 define double @fma(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = add double %A, %arg3
+        %A = fmul double %arg1,  %arg2
+        %B = fadd double %A, %arg3
         ret double %B
 }
 
 define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2
-        %B = add <2 x double> %A, %arg3
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fadd <2 x double> %A, %arg3
         ret <2 x double> %B
 }
 
 define double @fms(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = sub double %A, %arg3
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
         ret double %B
 }
 
 define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2
-        %B = sub <2 x double> %A, %arg3
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %A, %arg3
         ret <2 x double> %B
 }
 
 ; - (a * b - c)
 define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = sub double %A, %arg3
-        %C = sub double -0.000000e+00, %B               ; <double> [#uses=1]
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
+        %C = fsub double -0.000000e+00, %B               ; <double> [#uses=1]
         ret double %C
 }
 
 ; Annother way of getting fnms
 ; - ( a * b ) + c => c - (a * b)
 define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = sub double %arg3, %A
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %arg3, %A
         ret double %B
 }
 
 ; FNMS: - (a * b - c) => c - (a * b)
 define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2
-        %B = sub <2 x double> %arg3, %A ;
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %arg3, %A ;
         ret <2 x double> %B
 }
 
 ; Another way to get fnms using a constant vector
 ; - ( a * b - c)
 define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
-        %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
-        %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
+        %A = fmul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
+        %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
+        %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
         ret <2 x double> %C
 }
 
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 4c6fbb9..5bd66f4 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -7,22 +7,22 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i
 target triple = "spu"
 
 define double @fneg_dp(double %X) {
-        %Y = sub double -0.000000e+00, %X
+        %Y = fsub double -0.000000e+00, %X
         ret double %Y
 }
 
 define <2 x double> @fneg_dp_vec(<2 x double> %X) {
-        %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
+        %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
         ret <2 x double> %Y
 }
 
 define float @fneg_sp(float %X) {
-        %Y = sub float -0.000000e+00, %X
+        %Y = fsub float -0.000000e+00, %X
         ret float %Y
 }
 
 define <4 x float> @fneg_sp_vec(<4 x float> %X) {
-        %Y = sub <4 x float> <float -0.000000e+00, float -0.000000e+00,
+        %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
                               float -0.000000e+00, float -0.000000e+00>, %X
         ret <4 x float> %Y
 }
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
index df3baef..d77dd92 100644
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -12,79 +12,79 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i
 target triple = "spu"
 
 define float @fp_add(float %arg1, float %arg2) {
-        %A = add float %arg1, %arg2     ; <float> [#uses=1]
+        %A = fadd float %arg1, %arg2     ; <float> [#uses=1]
         ret float %A
 }
 
 define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = add <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
+        %A = fadd <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
         ret <4 x float> %A
 }
 
 define float @fp_sub(float %arg1, float %arg2) {
-        %A = sub float %arg1,  %arg2    ; <float> [#uses=1]
+        %A = fsub float %arg1,  %arg2    ; <float> [#uses=1]
         ret float %A
 }
 
 define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = sub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %A = fsub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
         ret <4 x float> %A
 }
 
 define float @fp_mul(float %arg1, float %arg2) {
-        %A = mul float %arg1,  %arg2    ; <float> [#uses=1]
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
         ret float %A
 }
 
 define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = mul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
         ret <4 x float> %A
 }
 
 define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = add float %A, %arg3        ; <float> [#uses=1]
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fadd float %A, %arg3        ; <float> [#uses=1]
         ret float %B
 }
 
 define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = mul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = add <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fadd <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
         ret <4 x float> %B
 }
 
 define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = sub float %A, %arg3        ; <float> [#uses=1]
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fsub float %A, %arg3        ; <float> [#uses=1]
         ret float %B
 }
 
 define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = mul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = sub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fsub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
         ret <4 x float> %B
 }
 
 ; Test the straightforward way of getting fnms
 ; c - a * b
 define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2
-        %B = sub float %arg3, %A
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %arg3, %A
         ret float %B
 }
 
 ; Test another way of getting fnms
 ; - ( a *b -c ) = c - a * b
 define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2
-        %B = sub float %A, %arg3 
-        %C = sub float -0.0, %B
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %A, %arg3
+        %C = fsub float -0.0, %B
         ret float %C
 }
 
 define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = mul <4 x float> %arg1,  %arg2
-        %B = sub <4 x float> %A, %arg3
-        %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
+        %A = fmul <4 x float> %arg1,  %arg2
+        %B = fsub <4 x float> %A, %arg3
+        %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
         ret <4 x float> %D
 }
diff --git a/test/CodeGen/Generic/2006-07-03-schedulers.ll b/test/CodeGen/Generic/2006-07-03-schedulers.ll
index 4c4481c..597ee56 100644
--- a/test/CodeGen/Generic/2006-07-03-schedulers.ll
+++ b/test/CodeGen/Generic/2006-07-03-schedulers.ll
@@ -12,13 +12,13 @@ define i32 @testissue(i32 %i, float %x, float %y) {
 	br label %bb1
 
 bb1:		; preds = %bb1, %0
-	%x1 = mul float %x, %y		; <float> [#uses=1]
-	%y1 = mul float %y, 7.500000e-01		; <float> [#uses=1]
-	%z1 = add float %x1, %y1		; <float> [#uses=1]
-	%x2 = mul float %x, 5.000000e-01		; <float> [#uses=1]
-	%y2 = mul float %y, 0x3FECCCCCC0000000		; <float> [#uses=1]
-	%z2 = add float %x2, %y2		; <float> [#uses=1]
-	%z3 = add float %z1, %z2		; <float> [#uses=1]
+	%x1 = fmul float %x, %y		; <float> [#uses=1]
+	%y1 = fmul float %y, 7.500000e-01		; <float> [#uses=1]
+	%z1 = fadd float %x1, %y1		; <float> [#uses=1]
+	%x2 = fmul float %x, 5.000000e-01		; <float> [#uses=1]
+	%y2 = fmul float %y, 0x3FECCCCCC0000000		; <float> [#uses=1]
+	%z2 = fadd float %x2, %y2		; <float> [#uses=1]
+	%z3 = fadd float %z1, %z2		; <float> [#uses=1]
 	%i1 = shl i32 %i, 3		; <i32> [#uses=1]
 	%j1 = add i32 %i, 7		; <i32> [#uses=1]
 	%m1 = add i32 %i1, %j1		; <i32> [#uses=2]
diff --git a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
index 7495795..a61108a 100644
--- a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
+++ b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
@@ -71,10 +71,10 @@ cond_next159.i:		; preds = %cond_true356.i.preheader
 	%tmp178.i = add i32 %tmp116117.i, -128		; <i32> [#uses=2]
 	%tmp181.i = mul i32 %tmp178.i, %tmp178.i		; <i32> [#uses=1]
 	%tmp181182.i = sitofp i32 %tmp181.i to float		; <float> [#uses=1]
-	%tmp199200.pn.in.i = mul float %tmp181182.i, 0.000000e+00		; <float> [#uses=1]
+	%tmp199200.pn.in.i = fmul float %tmp181182.i, 0.000000e+00		; <float> [#uses=1]
 	%tmp199200.pn.i = fpext float %tmp199200.pn.in.i to double		; <double> [#uses=1]
-	%tmp201.pn.i = sub double 1.000000e+00, %tmp199200.pn.i		; <double> [#uses=1]
-	%factor.2.in.i = mul double 0.000000e+00, %tmp201.pn.i		; <double> [#uses=1]
+	%tmp201.pn.i = fsub double 1.000000e+00, %tmp199200.pn.i		; <double> [#uses=1]
+	%factor.2.in.i = fmul double 0.000000e+00, %tmp201.pn.i		; <double> [#uses=1]
 	%factor.2.i = fptrunc double %factor.2.in.i to float		; <float> [#uses=1]
 	br i1 false, label %cond_next312.i, label %cond_false222.i
 
diff --git a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
index 1cf822b..9acb852 100644
--- a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
+++ b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
@@ -5,7 +5,7 @@ entry:
 	br label %bb15
 
 bb15:		; preds = %bb15, %entry
-	%tmp21 = add <8 x double> zeroinitializer, zeroinitializer		; <<8 x double>> [#uses=1]
+	%tmp21 = fadd <8 x double> zeroinitializer, zeroinitializer		; <<8 x double>> [#uses=1]
 	br i1 false, label %bb30, label %bb15
 
 bb30:		; preds = %bb15
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index e5a5274..0169307 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -5,8 +5,8 @@ define void @test() {
 entry:
 	%tmp98 = load float* null, align 4		; <float> [#uses=1]
 	%tmp106 = load float* null, align 4		; <float> [#uses=1]
-	%tmp113 = add float %tmp98, %tmp106		; <float> [#uses=1]
-	%tmp119 = sub float %tmp113, 0.000000e+00		; <float> [#uses=1]
+	%tmp113 = fadd float %tmp98, %tmp106		; <float> [#uses=1]
+	%tmp119 = fsub float %tmp113, 0.000000e+00		; <float> [#uses=1]
 	call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
 	ret void
 }
diff --git a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
index 7fe19d9..b2112f3 100644
--- a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
+++ b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
@@ -30,16 +30,16 @@ bb.nph1770:		; preds = %bb429
 	br i1 false, label %bb471, label %bb505
 
 bb471:		; preds = %bb471, %bb.nph1770
-	%tmp487 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%tmp487 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	br i1 false, label %bb505, label %bb471
 
 bb505:		; preds = %bb471, %bb.nph1770
 	%xy.0.lcssa = phi double [ 0.000000e+00, %bb.nph1770 ], [ %tmp487, %bb471 ]		; <double> [#uses=1]
-	%tmp507 = sub double -0.000000e+00, %xy.0.lcssa		; <double> [#uses=1]
+	%tmp507 = fsub double -0.000000e+00, %xy.0.lcssa		; <double> [#uses=1]
 	%tmp509 = fdiv double %tmp507, 0.000000e+00		; <double> [#uses=1]
-	%tmp510 = mul double %tmp509, 1.024000e+03		; <double> [#uses=1]
+	%tmp510 = fmul double %tmp509, 1.024000e+03		; <double> [#uses=1]
 	%tmp516 = fdiv double %tmp510, 0.000000e+00		; <double> [#uses=1]
-	%tmp517 = add double %tmp516, 5.000000e-01		; <double> [#uses=1]
+	%tmp517 = fadd double %tmp516, 5.000000e-01		; <double> [#uses=1]
 	%tmp518 = tail call double @floor( double %tmp517 ) nounwind readnone 		; <double> [#uses=0]
 	ret i32 0
 
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
new file mode 100644
index 0000000..59e7d0c
--- /dev/null
+++ b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc
+; PR4317
+
+declare i32 @b()
+
+define void @a() {
+entry:
+  ret void
+
+dummy:
+  invoke i32 @b() to label %reg unwind label %reg
+
+reg:
+  ret void
+}
diff --git a/test/CodeGen/Generic/fneg-fabs.ll b/test/CodeGen/Generic/fneg-fabs.ll
index f9580b1..2709fa1 100644
--- a/test/CodeGen/Generic/fneg-fabs.ll
+++ b/test/CodeGen/Generic/fneg-fabs.ll
@@ -1,12 +1,12 @@
 ; RUN: llvm-as < %s | llc
 
 define double @fneg(double %X) {
-        %Y = sub double -0.000000e+00, %X               ; <double> [#uses=1]
+        %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]
         ret double %Y
 }
 
 define float @fnegf(float %X) {
-        %Y = sub float -0.000000e+00, %X                ; <float> [#uses=1]
+        %Y = fsub float -0.000000e+00, %X                ; <float> [#uses=1]
         ret float %Y
 }
 
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index 87aa1a0..1e27061 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -24,9 +24,9 @@ define i32 @main() {
 	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]
 	call i32 (i8*, ...)* @printf( i8* %b_s, double %b )		; <i32>:2 [#uses=0]
-	%add_r = add double %a, %b		; <double> [#uses=1]
-	%sub_r = sub double %a, %b		; <double> [#uses=1]
-	%mul_r = mul double %a, %b		; <double> [#uses=1]
+	%add_r = fadd double %a, %b		; <double> [#uses=1]
+	%sub_r = fsub double %a, %b		; <double> [#uses=1]
+	%mul_r = fmul double %a, %b		; <double> [#uses=1]
 	%div_r = fdiv double %b, %a		; <double> [#uses=1]
 	%rem_r = frem double %b, %a		; <double> [#uses=1]
 	%add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0		; <i8*> [#uses=1]
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index fc573f3..a532703 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -9,8 +9,8 @@ define void @testConsts(i32 %N, float %X) {
         %a = add i32 %N, 1              ; <i32> [#uses=0]
         %i = add i32 %N, 12345678               ; <i32> [#uses=0]
         %b = add i16 4, 3               ; <i16> [#uses=0]
-        %c = add float %X, 0.000000e+00         ; <float> [#uses=0]
-        %d = add float %X, 0x400921CAC0000000           ; <float> [#uses=0]
+        %c = fadd float %X, 0.000000e+00         ; <float> [#uses=0]
+        %d = fadd float %X, 0x400921CAC0000000           ; <float> [#uses=0]
         %f = add i32 -1, 10             ; <i32> [#uses=0]
         %g = add i16 20, -1             ; <i16> [#uses=0]
         %j = add i16 -1, 30             ; <i16> [#uses=0]
@@ -126,8 +126,8 @@ define void @testfloatbool(float %x, float %y) {
         br label %Top
 
 Top:            ; preds = %Top, %0
-        %p = add float %x, %y           ; <float> [#uses=1]
-        %z = sub float %x, %y           ; <float> [#uses=1]
+        %p = fadd float %x, %y           ; <float> [#uses=1]
+        %z = fsub float %x, %y           ; <float> [#uses=1]
         %b = fcmp ole float %p, %z              ; <i1> [#uses=2]
         %c = xor i1 %b, true            ; <i1> [#uses=0]
         br i1 %b, label %Top, label %goon
diff --git a/test/CodeGen/Generic/storetrunc-fp.ll b/test/CodeGen/Generic/storetrunc-fp.ll
index 710a990..0f7bb0b 100644
--- a/test/CodeGen/Generic/storetrunc-fp.ll
+++ b/test/CodeGen/Generic/storetrunc-fp.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llc
 
 define void @foo(double %a, double %b, float* %fp) {
-	%c = add double %a, %b
+	%c = fadd double %a, %b
 	%d = fptrunc double %c to float
 	store float %d, float* %fp
 	ret void
diff --git a/test/CodeGen/Generic/v-split.ll b/test/CodeGen/Generic/v-split.ll
index a312492..44601d0 100644
--- a/test/CodeGen/Generic/v-split.ll
+++ b/test/CodeGen/Generic/v-split.ll
@@ -4,7 +4,7 @@
 define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
   %p = load %f8* %P
   %q = load %f8* %Q
-  %R = add %f8 %p, %q
+  %R = fadd %f8 %p, %q
   store %f8 %R, %f8 *%S
   ret void
 }
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index 9105757..f283256 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -14,7 +14,7 @@
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 	%p = load %f1* %P		; <%f1> [#uses=1]
 	%q = load %f1* %Q		; <%f1> [#uses=1]
-	%R = add %f1 %p, %q		; <%f1> [#uses=1]
+	%R = fadd %f1 %p, %q		; <%f1> [#uses=1]
 	store %f1 %R, %f1* %S
 	ret void
 }
@@ -22,7 +22,7 @@ define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 	%p = load %f2* %P		; <%f2> [#uses=1]
 	%q = load %f2* %Q		; <%f2> [#uses=1]
-	%R = add %f2 %p, %q		; <%f2> [#uses=1]
+	%R = fadd %f2 %p, %q		; <%f2> [#uses=1]
 	store %f2 %R, %f2* %S
 	ret void
 }
@@ -30,7 +30,7 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
 	%q = load %f4* %Q		; <%f4> [#uses=1]
-	%R = add %f4 %p, %q		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, %q		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
@@ -38,7 +38,7 @@ define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 	%p = load %f8* %P		; <%f8> [#uses=1]
 	%q = load %f8* %Q		; <%f8> [#uses=1]
-	%R = add %f8 %p, %q		; <%f8> [#uses=1]
+	%R = fadd %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
 }
@@ -46,7 +46,7 @@ define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
 	%p = load %f8* %P		; <%f8> [#uses=1]
 	%q = load %f8* %Q		; <%f8> [#uses=1]
-	%R = mul %f8 %p, %q		; <%f8> [#uses=1]
+	%R = fmul %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
 }
@@ -64,21 +64,21 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
 
 define void @test_cst(%f4* %P, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
-	%R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
-	%R = add %f4 %p, zeroinitializer		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, zeroinitializer		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
-	%R = add %f4 %p, undef		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, undef		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
@@ -115,7 +115,7 @@ define double @test_extract_elt2(%d8* %P) {
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
 	%tmp = load %f4* %b		; <%f4> [#uses=1]
-	%tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]
+	%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]
 	%tmp3 = bitcast %f4 %tmp2 to %i4		; <%i4> [#uses=1]
 	%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >		; <%i4> [#uses=1]
 	store %i4 %tmp4, %i4* %a
@@ -137,7 +137,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
 	%tmp4 = insertelement %f4 %tmp2, float %X, i32 2		; <%f4> [#uses=1]
 	%tmp6 = insertelement %f4 %tmp4, float %X, i32 3		; <%f4> [#uses=1]
 	%q = load %f4* %Q		; <%f4> [#uses=1]
-	%R = add %f4 %q, %tmp6		; <%f4> [#uses=1]
+	%R = fadd %f4 %q, %tmp6		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %P
 	ret void
 }
diff --git a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
index 20050e9..70f1d99 100644
--- a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
+++ b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
@@ -2,7 +2,7 @@
 
 define i16 @test(double %d) nounwind {
 entry:
-        %add = add double %d, 1.000000e+00
+        %add = fadd double %d, 1.000000e+00
         %call = tail call i16 @funct(double %add) nounwind
         ret i16 %call
 }
diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll
index 95792ff..f8eca85 100644
--- a/test/CodeGen/Mips/2008-07-06-fadd64.ll
+++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -5,6 +5,6 @@ target triple = "mipsallegrexel-psp-elf"
 
 define double @dofloat(double %a, double %b) nounwind {
 entry:
-	add double %a, %b		; <double>:0 [#uses=1]
+	fadd double %a, %b		; <double>:0 [#uses=1]
 	ret double %0
 }
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 99eccf5..2af7ab1 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -6,7 +6,7 @@ target triple = "mipsallegrexel-psp-elf"
 
 define float @F(float %a) nounwind {
 entry:
-	add float %a, 0x4011333340000000		; <float>:0 [#uses=1]
-	add float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
 	ret float %1
 }
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 7bc1f42..4580215 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -11,7 +11,7 @@ entry:
 	br i1 %0, label %bb, label %bb2
 
 bb:		; preds = %entry
-	add float %a, 1.000000e+00		; <float>:1 [#uses=1]
+	fadd float %a, 1.000000e+00		; <float>:1 [#uses=1]
 	ret float %1
 
 bb2:		; preds = %entry
diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll
index 8495bfe..9d18f47 100644
--- a/test/CodeGen/Mips/2008-08-03-fabs64.ll
+++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -9,7 +9,7 @@ define double @A(double %c, double %d) nounwind readnone  {
 entry:
 	tail call double @fabs( double %c ) nounwind readnone 		; <double>:0 [#uses=1]
 	tail call double @fabs( double %d ) nounwind readnone 		; <double>:0 [#uses=1]
-  add double %0, %1
+  fadd double %0, %1
   ret double %2
 }
 
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index c9ee2cf..1f7440a 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -21,12 +21,12 @@ entry:
 	load i16* %3, align 2		; <i16>:4 [#uses=1]
 	uitofp i16 %4 to double		; <double>:5 [#uses=1]
 	tail call double @ldexp( double %5, i32 -32 ) nounwind		; <double>:6 [#uses=1]
-	add double %2, %6		; <double>:7 [#uses=1]
+	fadd double %2, %6		; <double>:7 [#uses=1]
 	getelementptr i16* %xseed, i32 2		; <i16*>:8 [#uses=1]
 	load i16* %8, align 2		; <i16>:9 [#uses=1]
 	uitofp i16 %9 to double		; <double>:10 [#uses=1]
 	tail call double @ldexp( double %10, i32 -16 ) nounwind		; <double>:11 [#uses=1]
-	add double %7, %11		; <double>:12 [#uses=1]
+	fadd double %7, %11		; <double>:12 [#uses=1]
 	ret double %12
 }
 
@@ -45,11 +45,11 @@ entry:
 	load i16* %4, align 2		; <i16>:5 [#uses=1]
 	uitofp i16 %5 to double		; <double>:6 [#uses=1]
 	tail call double @ldexp( double %6, i32 -32 ) nounwind		; <double>:7 [#uses=1]
-	add double %3, %7		; <double>:8 [#uses=1]
+	fadd double %3, %7		; <double>:8 [#uses=1]
 	getelementptr i16* %xseed, i32 2		; <i16*>:9 [#uses=1]
 	load i16* %9, align 2		; <i16>:10 [#uses=1]
 	uitofp i16 %10 to double		; <double>:11 [#uses=1]
 	tail call double @ldexp( double %11, i32 -16 ) nounwind		; <double>:12 [#uses=1]
-	add double %8, %12		; <double>:13 [#uses=1]
+	fadd double %8, %12		; <double>:13 [#uses=1]
 	ret double %13
 }
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index e2f06f5..1b3bde8 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -5,6 +5,6 @@ target triple = "powerpc-apple-darwin8.2.0"
 
 ; Dead argument should reserve an FP register.
 define double @bar(double %DEAD, double %X, double %Y) {
-        %tmp.2 = add double %X, %Y              ; <double> [#uses=1]
+        %tmp.2 = fadd double %X, %Y              ; <double> [#uses=1]
         ret double %tmp.2
 }
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
index a58cd16..7a65c00 100644
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -9,15 +9,15 @@ define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {
 entry:
         %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]
         %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]
-        %tmp2 = add double %tmp.upgrd.1, %x             ; <double> [#uses=1]
+        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]
         store double %tmp2, double* %tmp
         %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]
         %tmp7 = load double* %tmp6              ; <double> [#uses=1]
-        %tmp9 = add double %tmp7, %y            ; <double> [#uses=1]
+        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]
         store double %tmp9, double* %tmp6
         %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]
         %tmp14 = load double* %tmp13            ; <double> [#uses=1]
-        %tmp16 = add double %tmp14, %z          ; <double> [#uses=1]
+        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]
         store double %tmp16, double* %tmp13
         ret void
 }
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index 04ca3bb..637208b 100644
--- a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -604,10 +604,10 @@ xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit
 	shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:589 [#uses=1]
 	shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:590 [#uses=1]
 	shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:591 [#uses=1]
-	mul <4 x float> zeroinitializer, %589		; <<4 x float>>:592 [#uses=0]
-	mul <4 x float> zeroinitializer, %590		; <<4 x float>>:593 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]
-	mul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]
+	fmul <4 x float> zeroinitializer, %589		; <<4 x float>>:592 [#uses=0]
+	fmul <4 x float> zeroinitializer, %590		; <<4 x float>>:593 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]
+	fmul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:596 [#uses=2]
 	load <4 x float>* %596		; <<4 x float>>:597 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %596
@@ -621,8 +621,8 @@ xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit
 	load <4 x float>* null		; <<4 x float>>:604 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:605 [#uses=1]
 	load <4 x float>* %605		; <<4 x float>>:606 [#uses=1]
-	sub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]
-	sub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]
+	fsub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]
+	fsub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:609 [#uses=0]
 	br i1 false, label %617, label %610
 
@@ -672,21 +672,21 @@ xST.exit400:		; preds = %633, %625, %610
 	load <4 x float>* null		; <<4 x float>>:638 [#uses=2]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:639 [#uses=0]
 	load <4 x float>* null		; <<4 x float>>:640 [#uses=2]
-	mul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]
-	mul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]
+	fmul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]
+	fmul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:644 [#uses=0]
 	shufflevector <4 x float> %643, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:645 [#uses=1]
-	add <4 x float> %645, %643		; <<4 x float>>:646 [#uses=0]
+	fadd <4 x float> %645, %643		; <<4 x float>>:646 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:647 [#uses=1]
 	shufflevector <4 x float> %641, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:648 [#uses=1]
-	add <4 x float> zeroinitializer, %647		; <<4 x float>>:649 [#uses=2]
-	add <4 x float> zeroinitializer, %648		; <<4 x float>>:650 [#uses=0]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:651 [#uses=2]
+	fadd <4 x float> zeroinitializer, %647		; <<4 x float>>:649 [#uses=2]
+	fadd <4 x float> zeroinitializer, %648		; <<4 x float>>:650 [#uses=0]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:651 [#uses=2]
 	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %649 )		; <<4 x float>>:652 [#uses=1]
-	mul <4 x float> %652, %649		; <<4 x float>>:653 [#uses=1]
+	fmul <4 x float> %652, %649		; <<4 x float>>:653 [#uses=1]
 	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %651 )		; <<4 x float>>:654 [#uses=1]
-	mul <4 x float> %654, %651		; <<4 x float>>:655 [#uses=0]
+	fmul <4 x float> %654, %651		; <<4 x float>>:655 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:656 [#uses=1]
 	br i1 %656, label %665, label %657
 
@@ -721,9 +721,9 @@ xST.exit402:		; preds = %669, %657
 	load <4 x float>* null		; <<4 x float>>:676 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:677 [#uses=1]
 	shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:678 [#uses=1]
-	mul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]
-	mul <4 x float> zeroinitializer, %678		; <<4 x float>>:680 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:681 [#uses=1]
+	fmul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]
+	fmul <4 x float> zeroinitializer, %678		; <<4 x float>>:680 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:681 [#uses=1]
 	icmp eq i32 0, 0		; <i1>:682 [#uses=1]
 	br i1 %682, label %689, label %683
 
@@ -750,7 +750,7 @@ xST.exit405:		; preds = %689, %683
 	load <4 x float>* null		; <<4 x float>>:698 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:699 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:700 [#uses=1]
-	add <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]
+	fadd <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]
 	load <4 x i32>* %.sub7896		; <<4 x i32>>:702 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer )		; <i32>:703 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:704 [#uses=2]
@@ -769,7 +769,7 @@ xST.exit405:		; preds = %689, %683
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:714 [#uses=1]
 	load <4 x float>* %714		; <<4 x float>>:715 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:716 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]
 	load <4 x i32>* %.sub7896		; <<4 x i32>>:718 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0		; <<4 x float>*>:719 [#uses=1]
 	store <4 x float> zeroinitializer, <4 x float>* %719
@@ -791,10 +791,10 @@ xST.exit405:		; preds = %689, %683
 	load <4 x float>* %732		; <<4 x float>>:733 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:734 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:735 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:737 [#uses=1]
-	mul <4 x float> zeroinitializer, %735		; <<4 x float>>:738 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:739 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:737 [#uses=1]
+	fmul <4 x float> zeroinitializer, %735		; <<4 x float>>:738 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:739 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:740 [#uses=1]
 	icmp eq i32 %740, 0		; <i1>:741 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:742 [#uses=2]
@@ -821,9 +821,9 @@ xST.exit405:		; preds = %689, %683
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:761 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:762 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:763 [#uses=1]
-	add <4 x float> %757, zeroinitializer		; <<4 x float>>:764 [#uses=0]
-	add <4 x float> %758, %763		; <<4 x float>>:765 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:766 [#uses=1]
+	fadd <4 x float> %757, zeroinitializer		; <<4 x float>>:764 [#uses=0]
+	fadd <4 x float> %758, %763		; <<4 x float>>:765 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:766 [#uses=1]
 	br i1 false, label %773, label %767
 
 ; <label>:767		; preds = %xST.exit405
@@ -841,7 +841,7 @@ xST.exit405:		; preds = %689, %683
 xST.exit422:		; preds = %773, %767
 	%.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ]		; <<4 x float>> [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:774 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:775 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:775 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:776 [#uses=1]
 	br i1 %776, label %780, label %777
 
@@ -1295,7 +1295,7 @@ xST.exit469:		; preds = %1027, %1025, %1005
 	%.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ]		; <<4 x float>> [#uses=1]
 	load <4 x float>* null		; <<4 x float>>:1029 [#uses=0]
 	load <4 x float>* null		; <<4 x float>>:1030 [#uses=0]
-	sub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]
+	fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]
 	br i1 false, label %1037, label %1032
 
 ; <label>:1032		; preds = %xST.exit469
@@ -1368,8 +1368,8 @@ xST.exit472:		; preds = %1050, %1048, %1032
 
 xST.exit474:		; preds = %1059, %1058, %1051
 	load <4 x float>* null		; <<4 x float>>:1060 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]
-	mul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]
+	fmul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]
 	br i1 false, label %1065, label %1063
 
 ; <label>:1063		; preds = %xST.exit474
@@ -1556,8 +1556,8 @@ xST.exit489:		; preds = %1109, %1108, %1101
 
 xST.exit492:		; preds = %1118, %1117, %1110
 	load <4 x float>* null		; <<4 x float>>:1119 [#uses=1]
-	mul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]
+	fmul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]
 	br i1 false, label %1123, label %1122
 
 ; <label>:1122		; preds = %xST.exit492
@@ -1591,8 +1591,8 @@ xST.exit495:		; preds = %1130, %1129, %1122
 	%.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ]		; <<4 x float>> [#uses=1]
 	%.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ]		; <<4 x float>> [#uses=1]
 	load <4 x float>* null		; <<4 x float>>:1131 [#uses=1]
-	add <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]
+	fadd <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]
 	br i1 false, label %1135, label %1134
 
 ; <label>:1134		; preds = %xST.exit495
@@ -1633,10 +1633,10 @@ xST.exit498:		; preds = %1142, %1141, %1134
 	shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1148 [#uses=1]
 	shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1149 [#uses=1]
 	shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1150 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1151 [#uses=1]
-	mul <4 x float> zeroinitializer, %1148		; <<4 x float>>:1152 [#uses=1]
-	mul <4 x float> zeroinitializer, %1149		; <<4 x float>>:1153 [#uses=1]
-	mul <4 x float> zeroinitializer, %1150		; <<4 x float>>:1154 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1151 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1148		; <<4 x float>>:1152 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1149		; <<4 x float>>:1153 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1150		; <<4 x float>>:1154 [#uses=1]
 	br i1 false, label %1156, label %1155
 
 ; <label>:1155		; preds = %xST.exit498
@@ -1676,10 +1676,10 @@ xST.exit501:		; preds = %1163, %1162, %1155
 	load <4 x float>* %1165		; <<4 x float>>:1166 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1167 [#uses=1]
 	load <4 x float>* %1167		; <<4 x float>>:1168 [#uses=1]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]
-	add <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]
-	add <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]
-	add <4 x float> zeroinitializer, %1168		; <<4 x float>>:1172 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1168		; <<4 x float>>:1172 [#uses=1]
 	br i1 false, label %1174, label %1173
 
 ; <label>:1173		; preds = %xST.exit501
@@ -1714,7 +1714,7 @@ xST.exit504:		; preds = %1181, %1180, %1173
 	%.07726 = phi <4 x float> [ %1171, %1173 ], [ %.17727, %1181 ], [ %.17727, %1180 ]		; <<4 x float>> [#uses=1]
 	%.07730 = phi <4 x float> [ %1170, %1173 ], [ %.17731, %1181 ], [ %.17731, %1180 ]		; <<4 x float>> [#uses=1]
 	%.07734 = phi <4 x float> [ %1169, %1173 ], [ %.17735, %1181 ], [ %.17735, %1180 ]		; <<4 x float>> [#uses=1]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1182 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1182 [#uses=1]
 	br i1 false, label %1184, label %1183
 
 ; <label>:1183		; preds = %xST.exit504
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index 5cccd31..aca0faa 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -9,8 +9,8 @@ entry:
         %input2 = load <4 x float>* null, align 16               ; <<4 x float>>
        	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
 
-        %mul1 = mul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
-        %add2 = add <4 x float> %mul1, %input2          ; <<4 x float>>
+        %mul1 = fmul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
+        %add2 = fadd <4 x float> %mul1, %input2          ; <<4 x float>>
         store <4 x float> %add2, <4 x float>* null, align 16
         ret void
 }
diff --git a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
index 7d86434..f55ffac 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
@@ -7,11 +7,11 @@ entry:
 	call ppc_fp128 @fabsl( ppc_fp128 %d ) nounwind readnone 		; <ppc_fp128>:0 [#uses=1]
 	fcmp olt ppc_fp128 0xM00000000000000000000000000000000, %0		; <i1>:1 [#uses=1]
 	%.pn106 = select i1 %1, ppc_fp128 %a, ppc_fp128 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
-	%.pn = sub ppc_fp128 0xM00000000000000000000000000000000, %.pn106		; <ppc_fp128> [#uses=1]
+	%.pn = fsub ppc_fp128 0xM00000000000000000000000000000000, %.pn106		; <ppc_fp128> [#uses=1]
 	%y.0 = fdiv ppc_fp128 %.pn, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
-	mul ppc_fp128 %y.0, 0xM3FF00000000000000000000000000000		; <ppc_fp128>:2 [#uses=1]
-	add ppc_fp128 %2, mul (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <ppc_fp128>:3 [#uses=1]
-	%tmpi = add ppc_fp128 %3, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	fmul ppc_fp128 %y.0, 0xM3FF00000000000000000000000000000		; <ppc_fp128>:2 [#uses=1]
+	fadd ppc_fp128 %2, fmul (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <ppc_fp128>:3 [#uses=1]
+	%tmpi = fadd ppc_fp128 %3, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmpi, ppc_fp128* null, align 16
 	ret i256 0
 }
diff --git a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
index 54bb4b3..a7f8181 100644
--- a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
+++ b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
@@ -7,7 +7,7 @@ entry:
 	br i1 false, label %bb3, label %bb4
 
 bb3:		; preds = %entry
-	sub ppc_fp128 0xM80000000000000000000000000000000, 0xM00000000000000000000000000000000		; <ppc_fp128>:0 [#uses=1]
+	fsub ppc_fp128 0xM80000000000000000000000000000000, 0xM00000000000000000000000000000000		; <ppc_fp128>:0 [#uses=1]
 	fptoui ppc_fp128 %0 to i32		; <i32>:1 [#uses=1]
 	zext i32 %1 to i64		; <i64>:2 [#uses=1]
 	sub i64 0, %2		; <i64>:3 [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index c181b1c..b625ceb 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -29,10 +29,10 @@ bb2217:		; preds = %bb2326
 	%10 = load float* %9, align 4		; <float> [#uses=1]
 	%11 = getelementptr float* null, i32 3		; <float*> [#uses=1]
 	%12 = load float* %11, align 4		; <float> [#uses=1]
-	%13 = mul float %10, 6.553500e+04		; <float> [#uses=1]
-	%14 = add float %13, 5.000000e-01		; <float> [#uses=1]
-	%15 = mul float %12, 6.553500e+04		; <float> [#uses=1]
-	%16 = add float %15, 5.000000e-01		; <float> [#uses=3]
+	%13 = fmul float %10, 6.553500e+04		; <float> [#uses=1]
+	%14 = fadd float %13, 5.000000e-01		; <float> [#uses=1]
+	%15 = fmul float %12, 6.553500e+04		; <float> [#uses=1]
+	%16 = fadd float %15, 5.000000e-01		; <float> [#uses=3]
 	%17 = fcmp olt float %14, 0.000000e+00		; <i1> [#uses=0]
 	%18 = fcmp olt float %16, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %18, label %bb2265, label %bb2262
@@ -68,10 +68,10 @@ bb2265:		; preds = %bb2264, %bb2262, %bb2217
 	%37 = load float* %36, align 4		; <float> [#uses=1]
 	%38 = getelementptr float* %36, i32 1		; <float*> [#uses=1]
 	%39 = load float* %38, align 4		; <float> [#uses=1]
-	%40 = mul float %37, 6.553500e+04		; <float> [#uses=1]
-	%41 = add float %40, 5.000000e-01		; <float> [#uses=1]
-	%42 = mul float %39, 6.553500e+04		; <float> [#uses=1]
-	%43 = add float %42, 5.000000e-01		; <float> [#uses=3]
+	%40 = fmul float %37, 6.553500e+04		; <float> [#uses=1]
+	%41 = fadd float %40, 5.000000e-01		; <float> [#uses=1]
+	%42 = fmul float %39, 6.553500e+04		; <float> [#uses=1]
+	%43 = fadd float %42, 5.000000e-01		; <float> [#uses=3]
 	%44 = fcmp olt float %41, 0.000000e+00		; <i1> [#uses=0]
 	%45 = fcmp olt float %43, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %45, label %bb2277, label %bb2274
@@ -88,10 +88,10 @@ bb2277:		; preds = %bb2274, %bb2265
 	%50 = load float* %49, align 4		; <float> [#uses=1]
 	%51 = getelementptr float* %36, i32 3		; <float*> [#uses=1]
 	%52 = load float* %51, align 4		; <float> [#uses=1]
-	%53 = mul float %50, 6.553500e+04		; <float> [#uses=1]
-	%54 = add float %53, 5.000000e-01		; <float> [#uses=1]
-	%55 = mul float %52, 6.553500e+04		; <float> [#uses=1]
-	%56 = add float %55, 5.000000e-01		; <float> [#uses=1]
+	%53 = fmul float %50, 6.553500e+04		; <float> [#uses=1]
+	%54 = fadd float %53, 5.000000e-01		; <float> [#uses=1]
+	%55 = fmul float %52, 6.553500e+04		; <float> [#uses=1]
+	%56 = fadd float %55, 5.000000e-01		; <float> [#uses=1]
 	%57 = fcmp olt float %54, 0.000000e+00		; <i1> [#uses=0]
 	%58 = fcmp olt float %56, 0.000000e+00		; <i1> [#uses=0]
 	%59 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
@@ -111,10 +111,10 @@ bb2277:		; preds = %bb2274, %bb2265
 	%73 = load float* %72, align 4		; <float> [#uses=1]
 	%74 = getelementptr float* %72, i32 1		; <float*> [#uses=1]
 	%75 = load float* %74, align 4		; <float> [#uses=1]
-	%76 = mul float %73, 6.553500e+04		; <float> [#uses=1]
-	%77 = add float %76, 5.000000e-01		; <float> [#uses=3]
-	%78 = mul float %75, 6.553500e+04		; <float> [#uses=1]
-	%79 = add float %78, 5.000000e-01		; <float> [#uses=1]
+	%76 = fmul float %73, 6.553500e+04		; <float> [#uses=1]
+	%77 = fadd float %76, 5.000000e-01		; <float> [#uses=3]
+	%78 = fmul float %75, 6.553500e+04		; <float> [#uses=1]
+	%79 = fadd float %78, 5.000000e-01		; <float> [#uses=1]
 	%80 = fcmp olt float %77, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %80, label %bb2295, label %bb2292
 
@@ -134,10 +134,10 @@ bb2295:		; preds = %bb2294, %bb2292, %bb2277
 	%86 = load float* %85, align 4		; <float> [#uses=1]
 	%87 = getelementptr float* %72, i32 3		; <float*> [#uses=1]
 	%88 = load float* %87, align 4		; <float> [#uses=1]
-	%89 = mul float %86, 6.553500e+04		; <float> [#uses=1]
-	%90 = add float %89, 5.000000e-01		; <float> [#uses=1]
-	%91 = mul float %88, 6.553500e+04		; <float> [#uses=1]
-	%92 = add float %91, 5.000000e-01		; <float> [#uses=1]
+	%89 = fmul float %86, 6.553500e+04		; <float> [#uses=1]
+	%90 = fadd float %89, 5.000000e-01		; <float> [#uses=1]
+	%91 = fmul float %88, 6.553500e+04		; <float> [#uses=1]
+	%92 = fadd float %91, 5.000000e-01		; <float> [#uses=1]
 	%93 = fcmp olt float %90, 0.000000e+00		; <i1> [#uses=0]
 	%94 = fcmp olt float %92, 0.000000e+00		; <i1> [#uses=0]
 	%95 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index 0283082..c760b41 100644
--- a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -3,9 +3,9 @@
 define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
         %imag59 = load ppc_fp128* null, align 8         ; <ppc_fp128> [#uses=1]
-        %0 = mul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
-        %1 = mul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
-        %2 = add ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
+        %0 = fmul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
+        %1 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
+        %2 = fadd ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
         store ppc_fp128 %2, ppc_fp128* null, align 16
         unreachable
 }
diff --git a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 4db5773..071c788 100644
--- a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -6,17 +6,17 @@ entry:
 	br i1 %0, label %bb5, label %bb1
 
 bb1:		; preds = %entry
-	%1 = mul ppc_fp128 %a, 0xM3DF00000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%1 = fmul ppc_fp128 %a, 0xM3DF00000000000000000000000000000		; <ppc_fp128> [#uses=1]
 	%2 = fptoui ppc_fp128 %1 to i32		; <i32> [#uses=1]
 	%3 = zext i32 %2 to i64		; <i64> [#uses=1]
 	%4 = shl i64 %3, 32		; <i64> [#uses=3]
 	%5 = uitofp i64 %4 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%6 = sub ppc_fp128 %a, %5		; <ppc_fp128> [#uses=3]
+	%6 = fsub ppc_fp128 %a, %5		; <ppc_fp128> [#uses=3]
 	%7 = fcmp olt ppc_fp128 %6, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
 	br i1 %7, label %bb2, label %bb3
 
 bb2:		; preds = %bb1
-	%8 = sub ppc_fp128 0xM80000000000000000000000000000000, %6		; <ppc_fp128> [#uses=1]
+	%8 = fsub ppc_fp128 0xM80000000000000000000000000000000, %6		; <ppc_fp128> [#uses=1]
 	%9 = fptoui ppc_fp128 %8 to i32		; <i32> [#uses=1]
 	%10 = zext i32 %9 to i64		; <i64> [#uses=1]
 	%11 = sub i64 %4, %10		; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 66428c7..20ff3db 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -11,7 +11,7 @@
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
         %tmp3 = load <4 x float>* %P1           ; <<4 x float>> [#uses=1]
-        %tmp4 = mul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]
+        %tmp4 = fmul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp4, <4 x float>* %P3
         store <4 x float> zeroinitializer, <4 x float>* %P1
         store <4 x i32> zeroinitializer, <4 x i32>* %P2
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index fd9bd74..4a6fe70 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -2,53 +2,53 @@
 ; RUN:   egrep {fn?madd|fn?msub} | count 8
 
 define double @test_FMADD1(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %D, %C		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FMADD2(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %D, %C		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FMSUB(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = sub double %D, %C		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %D, %C		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FNMADD1(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %D, %C		; <double> [#uses=1]
-	%F = sub double -0.000000e+00, %E		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
 }
 
 define double @test_FNMADD2(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %C, %D		; <double> [#uses=1]
-	%F = sub double -0.000000e+00, %E		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %C, %D		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
 }
 
 define double @test_FNMSUB1(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = sub double %C, %D		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %C, %D		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FNMSUB2(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = sub double %D, %C		; <double> [#uses=1]
-	%F = sub double -0.000000e+00, %E		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %D, %C		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
 }
 
 define float @test_FNMSUBS(float %A, float %B, float %C) {
-	%D = mul float %A, %B		; <float> [#uses=1]
-	%E = sub float %D, %C		; <float> [#uses=1]
-	%F = sub float -0.000000e+00, %E		; <float> [#uses=1]
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
 	ret float %F
 }
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index b9517de..6c10dfb 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -4,7 +4,7 @@ declare double @fabs(double)
 
 define double @test(double %X) {
         %Y = call double @fabs( double %X )             ; <double> [#uses=1]
-        %Z = sub double -0.000000e+00, %Y               ; <double> [#uses=1]
+        %Z = fsub double -0.000000e+00, %Y               ; <double> [#uses=1]
         ret double %Z
 }
 
diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll
index a4f49f7..9579a74 100644
--- a/test/CodeGen/PowerPC/fneg.ll
+++ b/test/CodeGen/PowerPC/fneg.ll
@@ -2,10 +2,10 @@
 
 define double @test1(double %a, double %b, double %c, double %d) {
 entry:
-        %tmp2 = sub double -0.000000e+00, %c            ; <double> [#uses=1]
-        %tmp4 = mul double %tmp2, %d            ; <double> [#uses=1]
-        %tmp7 = mul double %a, %b               ; <double> [#uses=1]
-        %tmp9 = sub double %tmp7, %tmp4         ; <double> [#uses=1]
+        %tmp2 = fsub double -0.000000e+00, %c            ; <double> [#uses=1]
+        %tmp4 = fmul double %tmp2, %d            ; <double> [#uses=1]
+        %tmp7 = fmul double %a, %b               ; <double> [#uses=1]
+        %tmp9 = fsub double %tmp7, %tmp4         ; <double> [#uses=1]
         ret double %tmp9
 }
 
diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll
index 3d66675..583408c 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-1.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -3,7 +3,7 @@
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:
 	%tmp1213 = uitofp i64 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%tmp15 = sub ppc_fp128 %a, %tmp1213		; <ppc_fp128> [#uses=1]
+	%tmp15 = fsub ppc_fp128 %a, %tmp1213		; <ppc_fp128> [#uses=1]
 	%tmp2829 = fptoui ppc_fp128 %tmp15 to i32		; <i32> [#uses=1]
 	%tmp282930 = zext i32 %tmp2829 to i64		; <i64> [#uses=1]
 	%tmp32 = add i64 %tmp282930, 0		; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/itofp128.ll b/test/CodeGen/PowerPC/itofp128.ll
index 91119e9..4d74511 100644
--- a/test/CodeGen/PowerPC/itofp128.ll
+++ b/test/CodeGen/PowerPC/itofp128.ll
@@ -6,7 +6,7 @@ target triple = "powerpc64-apple-darwin9.2.0"
 define i128 @__fixunstfti(ppc_fp128 %a) nounwind  {
 entry:
         %tmp1213 = uitofp i128 0 to ppc_fp128           ; <ppc_fp128> [#uses=1]
-        %tmp15 = sub ppc_fp128 %a, %tmp1213             ; <ppc_fp128> [#uses=1]
+        %tmp15 = fsub ppc_fp128 %a, %tmp1213             ; <ppc_fp128> [#uses=1]
         %tmp2829 = fptoui ppc_fp128 %tmp15 to i64               ; <i64> [#uses=1]
         %tmp282930 = zext i64 %tmp2829 to i128          ; <i128> [#uses=1]
         %tmp32 = add i128 %tmp282930, 0         ; <i128> [#uses=1]
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index d5484bd..fd0e1d4 100644
--- a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -9,9 +9,9 @@ define void @func(<4 x float>* %a, <4 x float>* %b) {
         %tmp = load <4 x float>* %tmp1          ; <<4 x float>> [#uses=1]
         %tmp3 = getelementptr <4 x float>* %a, i32 1            ; <<4 x float>*> [#uses=1]
         %tmp4 = load <4 x float>* %tmp3         ; <<4 x float>> [#uses=1]
-        %tmp5 = mul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]
+        %tmp5 = fmul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]
         %tmp8 = load <4 x float>* %b            ; <<4 x float>> [#uses=1]
-        %tmp9 = add <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp9, <4 x float>* %a
         ret void
 }
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
index b72b148..3f75f7d 100644
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ b/test/CodeGen/PowerPC/multiple-return-values.ll
@@ -3,7 +3,7 @@
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
-        %z = add float %b, 7.0
+        %z = fadd float %b, 7.0
 	ret i64 %y, float %z
 }
 
diff --git a/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
index 5c059b4..e3c5ab1 100644
--- a/test/CodeGen/PowerPC/ppcf128-1-opt.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
@@ -5,19 +5,19 @@ target triple = "powerpc-apple-darwin8"
 
 define ppc_fp128 @plus(ppc_fp128 %x, ppc_fp128 %y) {
 entry:
-	%tmp3 = add ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	%tmp3 = fadd ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %tmp3
 }
 
 define ppc_fp128 @minus(ppc_fp128 %x, ppc_fp128 %y) {
 entry:
-	%tmp3 = sub ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	%tmp3 = fsub ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %tmp3
 }
 
 define ppc_fp128 @times(ppc_fp128 %x, ppc_fp128 %y) {
 entry:
-	%tmp3 = mul ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	%tmp3 = fmul ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %tmp3
 }
 
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index ea8dd37..a487de7 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -14,7 +14,7 @@ entry:
 	store ppc_fp128 %y, ppc_fp128* %y_addr
 	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp3 = add ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	%tmp3 = fadd ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
 	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
@@ -36,7 +36,7 @@ entry:
 	store ppc_fp128 %y, ppc_fp128* %y_addr
 	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp3 = sub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	%tmp3 = fsub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
 	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
@@ -58,7 +58,7 @@ entry:
 	store ppc_fp128 %y, ppc_fp128* %y_addr
 	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp3 = mul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	%tmp3 = fmul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
 	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
diff --git a/test/CodeGen/PowerPC/ppcf128-2.ll b/test/CodeGen/PowerPC/ppcf128-2.ll
index b4f61f8..4318226 100644
--- a/test/CodeGen/PowerPC/ppcf128-2.ll
+++ b/test/CodeGen/PowerPC/ppcf128-2.ll
@@ -4,7 +4,7 @@ define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
 entry:
         br i1 false, label %bb, label %bb8
 bb:             ; preds = %entry
-        %tmp5 = sub ppc_fp128 0xM80000000000000000000000000000000, %a           ; <ppc_fp128> [#uses=1]
+        %tmp5 = fsub ppc_fp128 0xM80000000000000000000000000000000, %a           ; <ppc_fp128> [#uses=1]
         %tmp6 = tail call i64 @__fixunstfdi( ppc_fp128 %tmp5 ) nounwind                 ; <i64> [#uses=0]
         ret i64 0
 bb8:            ; preds = %entry
diff --git a/test/CodeGen/PowerPC/ppcf128-4.ll b/test/CodeGen/PowerPC/ppcf128-4.ll
index 8921dfc..16d6178 100644
--- a/test/CodeGen/PowerPC/ppcf128-4.ll
+++ b/test/CodeGen/PowerPC/ppcf128-4.ll
@@ -2,9 +2,9 @@
 
 define ppc_fp128 @__floatditf(i64 %u) nounwind  {
 entry:
-        %tmp6 = mul ppc_fp128 0xM00000000000000000000000000000000, 0xM41F00000000000000000000000000000
+        %tmp6 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM41F00000000000000000000000000000
         %tmp78 = trunc i64 %u to i32
         %tmp789 = uitofp i32 %tmp78 to ppc_fp128
-        %tmp11 = add ppc_fp128 %tmp789, %tmp6
+        %tmp11 = fadd ppc_fp128 %tmp789, %tmp6
         ret ppc_fp128 %tmp11
 }
diff --git a/test/CodeGen/PowerPC/return-val-i128.ll b/test/CodeGen/PowerPC/return-val-i128.ll
index 6e68ee3..27a5004 100644
--- a/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/test/CodeGen/PowerPC/return-val-i128.ll
@@ -14,7 +14,7 @@ entry:
 	br i1 %toBool, label %bb, label %bb8
 bb:		; preds = %entry
 	%tmp4 = load float* %a_addr, align 4		; <float> [#uses=1]
-	%tmp5 = sub float -0.000000e+00, %tmp4		; <float> [#uses=1]
+	%tmp5 = fsub float -0.000000e+00, %tmp4		; <float> [#uses=1]
 	%tmp6 = call i128 @__fixunssfDI( float %tmp5 ) nounwind 		; <i128> [#uses=1]
 	%tmp7 = sub i128 0, %tmp6		; <i128> [#uses=1]
 	store i128 %tmp7, i128* %tmp, align 16
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index 3d52d0c..d211b3b 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -3,8 +3,8 @@
 ; RUN:   grep fmul | count 1
 
 define double @foo(double %X) {
-        %tmp1 = mul double %X, 1.23
-        %tmp2 = mul double %tmp1, 4.124
+        %tmp1 = fmul double %X, 1.23
+        %tmp2 = fmul double %tmp1, 4.124
         ret double %tmp2
 }
 
diff --git a/test/CodeGen/PowerPC/vec_fneg.ll b/test/CodeGen/PowerPC/vec_fneg.ll
index 2ef2099..9fdbffd 100644
--- a/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/test/CodeGen/PowerPC/vec_fneg.ll
@@ -2,7 +2,7 @@
 
 define void @t(<4 x float>* %A) {
 	%tmp2 = load <4 x float>* %A
-	%tmp3 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
+	%tmp3 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
 	store <4 x float> %tmp3, <4 x float>* %A
 	ret void
 }
diff --git a/test/CodeGen/PowerPC/vec_splat.ll b/test/CodeGen/PowerPC/vec_splat.ll
index a631137..7b7e4fe 100644
--- a/test/CodeGen/PowerPC/vec_splat.ll
+++ b/test/CodeGen/PowerPC/vec_splat.ll
@@ -15,7 +15,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
 }
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
index 8d06a7d..7350e91 100644
--- a/test/CodeGen/PowerPC/vec_zero.ll
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -2,7 +2,7 @@
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
-        %S = add <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
         store <4 x float> %S, <4 x float>* %P
         ret void
 }
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index 679e69e..a6c17b4 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -14,7 +14,7 @@
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
         %p = load %f1* %P               ; <%f1> [#uses=1]
         %q = load %f1* %Q               ; <%f1> [#uses=1]
-        %R = add %f1 %p, %q             ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
         store %f1 %R, %f1* %S
         ret void
 }
@@ -22,7 +22,7 @@ define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
         %p = load %f2* %P               ; <%f2> [#uses=1]
         %q = load %f2* %Q               ; <%f2> [#uses=1]
-        %R = add %f2 %p, %q             ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
         store %f2 %R, %f2* %S
         ret void
 }
@@ -30,7 +30,7 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %p, %q             ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -38,7 +38,7 @@ define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = add %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -46,7 +46,7 @@ define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = mul %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -63,7 +63,7 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
 
 define void @test_cst(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
  2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
@@ -71,14 +71,14 @@ define void @test_cst(%f4* %P, %f4* %S) {
 
 define void @test_zero(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, undef          ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -116,7 +116,7 @@ define double @test_extract_elt2(%d8* %P) {
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
         %tmp = load %f4* %b             ; <%f4> [#uses=1]
-        %tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float
 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
         %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
         %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           
@@ -140,7 +140,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3   
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
 }
diff --git a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
index 15af046..76f140c 100644
--- a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
+++ b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
@@ -2,8 +2,8 @@
 
 define void @execute_list() {
         %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]
-        %tmp.37.i = mul float 0.000000e+00, %tmp.33.i           ; <float> [#uses=1]
-        %tmp.42.i = add float %tmp.37.i, 0.000000e+00           ; <float> [#uses=1]
+        %tmp.37.i = fmul float 0.000000e+00, %tmp.33.i           ; <float> [#uses=1]
+        %tmp.42.i = fadd float %tmp.37.i, 0.000000e+00           ; <float> [#uses=1]
         call void @gl_EvalCoord1f( float %tmp.42.i )
         ret void
 }
diff --git a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
index b5d215b..04035ac 100644
--- a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
+++ b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
@@ -13,7 +13,7 @@ no_exit.16:             ; preds = %no_exit.16, %no_exit.16.preheader
 loopexit.16.loopexit:           ; preds = %no_exit.16
         br label %no_exit.18
 no_exit.18:             ; preds = %loopexit.20, %loopexit.16.loopexit
-        %tmp.882 = add float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]
+        %tmp.882 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]
         br i1 false, label %loopexit.19, label %no_exit.19.preheader
 no_exit.19.preheader:           ; preds = %no_exit.18
         ret void
@@ -21,9 +21,9 @@ loopexit.19:            ; preds = %no_exit.18
         br i1 false, label %loopexit.20, label %no_exit.20
 no_exit.20:             ; preds = %loopexit.21, %loopexit.19
         %ai2.1122.tmp.3 = phi float [ %tmp.958, %loopexit.21 ], [ %tmp.882, %loopexit.19 ]              ; <float> [#uses=1]
-        %tmp.950 = mul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]
-        %tmp.951 = sub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]
-        %tmp.958 = add float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]
+        %tmp.950 = fmul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]
+        %tmp.951 = fsub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]
+        %tmp.958 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]
         br i1 false, label %loopexit.21, label %no_exit.21.preheader
 no_exit.21.preheader:           ; preds = %no_exit.20
         ret void
diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index c9a0049..78838d1 100644
--- a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -11,7 +11,7 @@ cond_next33:		; preds = %0
 	%tmp58.i = or i32 0, %tmp61.i.upgrd.1		; <i32> [#uses=1]
 	%tmp62.i = or i32 %tmp58.i, 0		; <i32> [#uses=1]
 	%tmp62.i.upgrd.2 = sitofp i32 %tmp62.i to double		; <double> [#uses=1]
-	%tmp64.i = add double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]
+	%tmp64.i = fadd double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]
 	%tmp68.i = call double @foo( double %tmp64.i, i32 0 )		; <double> [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 811e9ac..3b365f3 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -3,12 +3,12 @@
 ; RUN:   %prcontext {mulss	LCPI1_3} 1 | grep mulss | count 1
 
 define float @foo(float %x) {
-    %tmp1 = mul float %x, 3.000000e+00
-    %tmp3 = mul float %x, 5.000000e+00
-    %tmp5 = mul float %x, 7.000000e+00
-    %tmp7 = mul float %x, 1.100000e+01
-    %tmp10 = add float %tmp1, %tmp3
-    %tmp12 = add float %tmp10, %tmp5
-    %tmp14 = add float %tmp12, %tmp7
+    %tmp1 = fmul float %x, 3.000000e+00
+    %tmp3 = fmul float %x, 5.000000e+00
+    %tmp5 = fmul float %x, 7.000000e+00
+    %tmp7 = fmul float %x, 1.100000e+01
+    %tmp10 = fadd float %tmp1, %tmp3
+    %tmp12 = fadd float %tmp10, %tmp5
+    %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 }
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index d1d0ea8..c03d982 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -139,7 +139,7 @@ b341:
 	%r353 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
 	%r354 = load float* %r353
 	%r362 = load float* bitcast ([128 x i64]* @i6000 to float*)
-	%r363 = add float 0.000000e+00, %r362
+	%r363 = fadd float 0.000000e+00, %r362
 	%r370 = load float* bitcast ([128 x i64]* @i6000 to float*)
 	%r376 = icmp slt i64 %r16, 0
 	br i1 %r376, label %b377, label %a35b
@@ -155,11 +155,11 @@ a35b:
 	%e785 = shl i64 %w1865, 0
 	%b1877 = mul i64 %w1865, 0
 	%s795 = add i64 %b1877, 0
-	%r399 = add float %r354, 0.000000e+00
-	%r402 = add float %r370, 0.000000e+00
-	%r403 = add float %r348, 0.000000e+00
+	%r399 = fadd float %r354, 0.000000e+00
+	%r402 = fadd float %r370, 0.000000e+00
+	%r403 = fadd float %r348, 0.000000e+00
 	%r411 = add i64 %s795, 0
-	%r431 = add float %r362, 0.000000e+00
+	%r431 = fadd float %r362, 0.000000e+00
 	%r454 = add i64 %e785, 0
 	%r457 = add i64 %e785, 0
 	%r459 = icmp slt i64 %r457, 0
@@ -230,21 +230,21 @@ a45b714:
 	%r750 = add i64 %r717, 0
 	%r751 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
 	%r752 = load float* %r751
-	%r753 = add float %r752, %r746
-	%r754 = add float %r728, %r722
-	%r755 = add float %r734, %r754
-	%r756 = add float %r755, %r740
-	%r757 = add float %r753, %r756
-	%r759 = add float %r757, %r540
+	%r753 = fadd float %r752, %r746
+	%r754 = fadd float %r728, %r722
+	%r755 = fadd float %r734, %r754
+	%r756 = fadd float %r755, %r740
+	%r757 = fadd float %r753, %r756
+	%r759 = fadd float %r757, %r540
 	%r770 = add i64 %r717, 0
 	%r771 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
 	%r772 = load float* %r771
 	%r776 = add i64 %r717, 0
 	%r777 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
 	%r778 = load float* %r777
-	%r781 = add float %r363, %r772
-	%r782 = add float %r781, %r778
-	%r783 = add float %r551, %r782
+	%r781 = fadd float %r363, %r772
+	%r782 = fadd float %r781, %r778
+	%r783 = fadd float %r551, %r782
 	br label %b712
 a57b:
 	br i1 %r335, label %a66b, label %b1086
@@ -310,10 +310,10 @@ a53b1019:
 	%r1035 = load float* %r1034
 	%r1037 = bitcast i8* %c22010 to float*
 	%r1040 = getelementptr float* %r1037, i64 %r1025
-	%r1044 = add float %r864, %r1035
-	%r1046 = add float %r870, %r1027
-	%r1047 = add float %r1044, %r1046
-	%r1048 = add float %r851, %r1047
+	%r1044 = fadd float %r864, %r1035
+	%r1046 = fadd float %r870, %r1027
+	%r1047 = fadd float %r1044, %r1046
+	%r1048 = fadd float %r851, %r1047
 	%v1886 = add i64 %w1885, 0
 	%u1890 = icmp slt i64 %v1886, %b1889
 	br i1 %u1890, label %b1016, label %a53b1019
@@ -341,7 +341,7 @@ b1117:
 	%r1132 = bitcast i8* %c22012 to float*
 	%r1134 = getelementptr float* %r1132, i64 %w1915
 	%r1135 = load float* %r1134
-	%r1136 = add float %r1123, %r1135
+	%r1136 = fadd float %r1123, %r1135
 	%r1138 = icmp slt i64 %r1114, 0
 	br i1 %r1138, label %b1139, label %a63b
 b1139:
@@ -387,7 +387,7 @@ b1263:
 a63b1266:
 	%w1944 = phi i64 [ 0, %a63b1266q ], [ %v1945, %a63b1266 ]
 	%s1377 = phi i64 [ %s1374, %a63b1266q ], [ %r1297, %a63b1266 ]
-	%r1282 = add float %r1136, 0.000000e+00
+	%r1282 = fadd float %r1136, 0.000000e+00
 	%r1297 = add i64 %s1377, 0
 	%v1945 = add i64 %w1944, 0
 	%u1949 = icmp slt i64 %v1945, %b1948
@@ -418,7 +418,7 @@ a74b:
 	%r1379 = add i64 %s1543, 0
 	%r1403 = add i64 %r1355, 0
 	%r1422 = add i64 %r1348, 0
-	%r1526 = add float %r1372, 0.000000e+00
+	%r1526 = fadd float %r1372, 0.000000e+00
 	%r1573 = add i64 %w1958, 0
 	%r1581 = icmp slt i64 %r1573, 0
 	%v1959 = add i64 %w1958, 0
@@ -448,10 +448,10 @@ a97b:
 	%r1763 = load float* %r1762
 	%r1767 = add i64 %r1622, 0
 	%r1768 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
-	%r1772 = add float %r1763, 0.000000e+00
-	%r1773 = add float %r1772, 0.000000e+00
-	%r1809 = add float %r1757, 0.000000e+00
-	%r1810 = add float %r1773, %r1809
+	%r1772 = fadd float %r1763, 0.000000e+00
+	%r1773 = fadd float %r1772, 0.000000e+00
+	%r1809 = fadd float %r1757, 0.000000e+00
+	%r1810 = fadd float %r1773, %r1809
 	store float %r1810, float* %r1768
 	%r1818 = add i64 %w1970, 0
 	%r1826 = icmp slt i64 %r1818, 0
diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index d4176f1..721b6e7 100644
--- a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -3,12 +3,12 @@
 
 define void @test() nounwind {
 test.exit:
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
 	load <4 x float>* null		; <<4 x float>>:1 [#uses=1]
 	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:2 [#uses=1]
-	mul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
-	sub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
-	mul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]
+	fmul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
+	fsub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
+	fmul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]
 	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:6 [#uses=1]
 	and <4 x i32> %6, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>>:7 [#uses=1]
 	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=2]
@@ -23,13 +23,13 @@ test.exit:
 	br i1 false, label %19, label %13
 
 ; <label>:13		; preds = %12
-	sub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]
+	fsub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]
 	%tmp207 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
 	%tmp208 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
-	sub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]
+	fsub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]
 	%tmp155 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
 	%tmp156 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
-	sub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]
+	fsub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]
 	br label %19
 
 ; <label>:17		; preds = %11
@@ -54,7 +54,7 @@ test.exit:
 	insertelement <4 x float> %31, float %25, i32 2		; <<4 x float>>:32 [#uses=1]
 	insertelement <4 x float> %32, float %25, i32 3		; <<4 x float>>:33 [#uses=1]
 	fdiv <4 x float> %33, zeroinitializer		; <<4 x float>>:34 [#uses=1]
-	mul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]
 	insertelement <4 x float> undef, float %22, i32 0		; <<4 x float>>:36 [#uses=1]
 	insertelement <4 x float> %36, float %21, i32 1		; <<4 x float>>:37 [#uses=0]
 	br i1 false, label %foo.exit, label %38
@@ -64,17 +64,17 @@ test.exit:
 	fcmp ogt float %39, 0.000000e+00		; <i1>:40 [#uses=1]
 	extractelement <4 x float> %0, i32 2		; <float>:41 [#uses=1]
 	extractelement <4 x float> %0, i32 1		; <float>:42 [#uses=1]
-	sub float -0.000000e+00, %42		; <float>:43 [#uses=2]
+	fsub float -0.000000e+00, %42		; <float>:43 [#uses=2]
 	%tmp189 = extractelement <4 x float> %5, i32 2		; <float> [#uses=1]
 	br i1 %40, label %44, label %46
 
 ; <label>:44		; preds = %38
-	sub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]
+	fsub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]
 	br label %foo.exit
 
 ; <label>:46		; preds = %38
 	%tmp192 = extractelement <4 x float> %5, i32 1		; <float> [#uses=1]
-	sub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]
+	fsub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]
 	br label %foo.exit
 
 foo.exit:		; preds = %46, %44, %19
diff --git a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
index ed5a194..514d665 100644
--- a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
+++ b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
@@ -11,7 +11,7 @@ bb:		; preds = %bb, %cond_true10
 	%tmp52 = bitcast <4 x float> %tmp49 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp53 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp52, <4 x i32> < i32 8, i32 undef, i32 undef, i32 undef > )		; <<4 x i32>> [#uses=1]
 	%tmp105 = bitcast <4 x i32> %tmp53 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp108 = sub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]
+	%tmp108 = fsub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]
 	br label %bb
 
 return:		; preds = %entry
diff --git a/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
index ce23da0..3e08e50 100644
--- a/test/CodeGen/X86/2007-04-24-VectorCrash.ll
+++ b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
@@ -8,8 +8,8 @@ define void @test(float* %P) {
 entry:
 	or <4 x i32> zeroinitializer, and (<4 x i32> bitcast (<4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer) to <4 x i32>), <4 x i32> < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >)		; <<4 x i32>>:0 [#uses=1]
 	bitcast <4 x i32> %0 to <4 x float>		; <<4 x float>>:1 [#uses=1]
-	sub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]
-	sub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]
+	fsub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]
+	fsub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %3, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:4 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %4, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:5 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %5, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:6 [#uses=1]
@@ -29,19 +29,19 @@ entry:
 	shufflevector <4 x float> zeroinitializer, <4 x float> %19, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:20 [#uses=1]
 	shufflevector <4 x float> %20, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:21 [#uses=1]
 	shufflevector <4 x float> %21, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:22 [#uses=1]
-	mul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]
+	fmul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]
 	shufflevector <4 x float> %23, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:24 [#uses=1]
 	call <4 x float> @llvm.x86.sse.add.ss( <4 x float> zeroinitializer, <4 x float> %24 )		; <<4 x float>>:25 [#uses=1]
 	shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:26 [#uses=1]
 	shufflevector <4 x float> %26, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:27 [#uses=1]
 	shufflevector <4 x float> %27, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:28 [#uses=1]
-	mul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]
-	add <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]
-	mul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]
+	fmul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]
+	fadd <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]
+	fmul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %31, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:32 [#uses=1]
-	mul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]
 	shufflevector <4 x float> %33, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:34 [#uses=1]
-	mul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %35, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >		; <<4 x float>>:36 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %36, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:37 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %37, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:38 [#uses=1]
@@ -56,7 +56,7 @@ entry:
 	shufflevector <4 x float> zeroinitializer, <4 x float> %46, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:47 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %47, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:48 [#uses=1]
 	shufflevector <4 x float> %48, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:49 [#uses=1]
-	add <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]
+	fadd <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]
 	%tmp5845 = extractelement <4 x float> %50, i32 2		; <float> [#uses=1]
 	store float %tmp5845, float* %P
 	ret void
diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index 11fb8e3..66a58c7 100644
--- a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -2,10 +2,10 @@
 
 define void @test(<4 x float>* %arg) {
 	%tmp89 = getelementptr <4 x float>* %arg, i64 3
-	%tmp1144 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
+	%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
 	store <4 x float> %tmp1144, <4 x float>* null
 	%tmp1149 = load <4 x float>* %tmp89
-	%tmp1150 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
+	%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
 	store <4 x float> %tmp1150, <4 x float>* %tmp89
 	ret void
 }
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index 120284f..7f09b52 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -27,7 +27,7 @@ bb383:		; preds = %bb164
 
 cond_true425:		; preds = %bb383
 	%tmp430 = load float* null		; <float> [#uses=1]
-	%tmp432 = sub float %tmp430, %tmp408		; <float> [#uses=1]
+	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
 	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
 	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
 	call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
index 142bcd3..835e4ca 100644
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -22,7 +22,7 @@ entry:
 
 	%tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
 	%tmp84.i1413.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
-	%tmp89.i1415.i = mul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
+	%tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
         ret i16 0
 }
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index 3a3c113..fd914a1 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -5,38 +5,38 @@ entry:
 	br i1 true, label %bb171.preheader, label %bb431
 
 bb171.preheader:		; preds = %entry
-	%tmp176 = add float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
+	%tmp176 = fadd float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
 	%gi.1 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
 	%tmp240 = load float* %gi.1, align 4		; <float> [#uses=1]
-	%tmp242 = sub float %tmp240, 0.000000e+00		; <float> [#uses=2]
+	%tmp242 = fsub float %tmp240, 0.000000e+00		; <float> [#uses=2]
 	%tmp251 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
 	%tmp252 = load float* %tmp251, align 4		; <float> [#uses=1]
 	%tmp258 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
 	%tmp259 = load float* %tmp258, align 4		; <float> [#uses=2]
-	%tmp261 = mul float %tmp259, %tmp176		; <float> [#uses=1]
-	%tmp262 = sub float 0.000000e+00, %tmp261		; <float> [#uses=2]
-	%tmp269 = mul float %tmp252, %tmp176		; <float> [#uses=1]
-	%tmp276 = mul float %tmp259, 0.000000e+00		; <float> [#uses=1]
-	%tmp277 = add float %tmp269, %tmp276		; <float> [#uses=2]
+	%tmp261 = fmul float %tmp259, %tmp176		; <float> [#uses=1]
+	%tmp262 = fsub float 0.000000e+00, %tmp261		; <float> [#uses=2]
+	%tmp269 = fmul float %tmp252, %tmp176		; <float> [#uses=1]
+	%tmp276 = fmul float %tmp259, 0.000000e+00		; <float> [#uses=1]
+	%tmp277 = fadd float %tmp269, %tmp276		; <float> [#uses=2]
 	%tmp281 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
 	%tmp282 = load float* %tmp281, align 4		; <float> [#uses=2]
-	%tmp284 = sub float %tmp282, %tmp277		; <float> [#uses=1]
-	%tmp291 = add float %tmp282, %tmp277		; <float> [#uses=1]
-	%tmp298 = sub float 0.000000e+00, %tmp262		; <float> [#uses=1]
-	%tmp305 = add float 0.000000e+00, %tmp262		; <float> [#uses=1]
-	%tmp315 = mul float 0.000000e+00, %tmp291		; <float> [#uses=1]
-	%tmp318 = mul float 0.000000e+00, %tmp298		; <float> [#uses=1]
-	%tmp319 = add float %tmp315, %tmp318		; <float> [#uses=1]
-	%tmp329 = add float 0.000000e+00, %tmp319		; <float> [#uses=1]
+	%tmp284 = fsub float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp291 = fadd float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp298 = fsub float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp305 = fadd float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp315 = fmul float 0.000000e+00, %tmp291		; <float> [#uses=1]
+	%tmp318 = fmul float 0.000000e+00, %tmp298		; <float> [#uses=1]
+	%tmp319 = fadd float %tmp315, %tmp318		; <float> [#uses=1]
+	%tmp329 = fadd float 0.000000e+00, %tmp319		; <float> [#uses=1]
 	store float %tmp329, float* null, align 4
-	%tmp336 = sub float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	%tmp336 = fsub float %tmp242, 0.000000e+00		; <float> [#uses=1]
 	store float %tmp336, float* %tmp258, align 4
-	%tmp343 = add float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	%tmp343 = fadd float %tmp242, 0.000000e+00		; <float> [#uses=1]
 	store float %tmp343, float* null, align 4
-	%tmp355 = mul float 0.000000e+00, %tmp305		; <float> [#uses=1]
-	%tmp358 = mul float 0.000000e+00, %tmp284		; <float> [#uses=1]
-	%tmp359 = add float %tmp355, %tmp358		; <float> [#uses=1]
-	%tmp369 = add float 0.000000e+00, %tmp359		; <float> [#uses=1]
+	%tmp355 = fmul float 0.000000e+00, %tmp305		; <float> [#uses=1]
+	%tmp358 = fmul float 0.000000e+00, %tmp284		; <float> [#uses=1]
+	%tmp359 = fadd float %tmp355, %tmp358		; <float> [#uses=1]
+	%tmp369 = fadd float 0.000000e+00, %tmp359		; <float> [#uses=1]
 	store float %tmp369, float* %gi.1, align 4
 	ret void
 
diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
index 7fe8eaf..4ae4d2f 100644
--- a/test/CodeGen/X86/2007-11-02-BadAsm.ll
+++ b/test/CodeGen/X86/2007-11-02-BadAsm.ll
@@ -45,7 +45,7 @@ cond_true.i34.i:		; preds = %xit.i
 cond_next.i79.i:		; preds = %xit.i
 	%phitmp167.i = fptosi double 0.000000e+00 to i64		; <i64> [#uses=1]
 	%tmp142143.i = fpext float %tmp6162.i.i to double		; <double> [#uses=1]
-	%tmp2.i139.i = add double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp2.i139.i = fadd double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
 	%tmp23.i140.i = fptosi double %tmp2.i139.i to i64		; <i64> [#uses=1]
 	br i1 false, label %cond_true.i143.i, label %round_coord.exit148.i
 
@@ -60,7 +60,7 @@ round_coord.exit148.i:		; preds = %cond_true.i143.i, %cond_next.i79.i
 	%tmp144149.i = phi i32 [ 32767, %cond_next.i79.i ], [ -32767, %cond_true.i143.i ]		; <i32> [#uses=1]
 	store i32 %tmp144149.i, i32* null, align 8
 	%tmp147148.i = fpext float %tmp67.i15.i to double		; <double> [#uses=1]
-	%tmp2.i128.i = add double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp2.i128.i = fadd double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
 	%tmp23.i129.i = fptosi double %tmp2.i128.i to i64		; <i64> [#uses=2]
 	%tmp5.i130.i = icmp slt i64 %tmp23.i129.i, 32768		; <i1> [#uses=1]
 	br i1 %tmp5.i130.i, label %cond_true.i132.i, label %round_coord.exit137.i
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index 605fb55..a4e44e1 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -13,8 +13,8 @@ bb18:		; preds = %bb18, %entry
 	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
 	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
 	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
-	%tmp11 = mul float %tmp9, %tmp45		; <float> [#uses=1]
-	%tmp14 = add float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
 	%tmp17 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp21 = icmp ult i32 %tmp17, %c		; <i1> [#uses=1]
 	br i1 %tmp21, label %bb18, label %bb23
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 1b36fce..46422bc 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -41,8 +41,8 @@ bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
 	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1]
 	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0]
 	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1]
-	%tmp15.i24.i = sub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
-	%tmp16.i25.i = mul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
+	%tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
+	%tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
 	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=2]
 	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1]
 	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
index 84229cf..cb7a3dc 100644
--- a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
+++ b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
@@ -288,7 +288,7 @@ invcont1640:		; preds = %tmp9.i3799.noexc
 			to label %invcont1642 unwind label %lpad3845		; <i8*> [#uses=0]
 
 invcont1642:		; preds = %invcont1640
-	%tmp18.i3770 = sub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
+	%tmp18.i3770 = fsub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
 	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
 			to label %bb3743 unwind label %lpad3845
 
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 83ca3e3..38020c1 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -4,29 +4,29 @@ define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %soluti
 entry:
 	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%tmp73 = add x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
+	%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
 	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
 	store double %tmp7374, double* null, align 8
 	%tmp81 = load double* null, align 8		; <double> [#uses=1]
-	%tmp82 = add double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
+	%tmp82 = fadd double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
 	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
 	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
-	%tmp85 = mul double 0.000000e+00, %tmp84		; <double> [#uses=1]
+	%tmp85 = fmul double 0.000000e+00, %tmp84		; <double> [#uses=1]
 	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
 	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%tmp89 = add x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
+	%tmp89 = fadd x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
 	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
 	store double %tmp8990, double* null, align 8
 	%tmp97 = load double* null, align 8		; <double> [#uses=1]
-	%tmp98 = add double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
+	%tmp98 = fadd double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
 	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
 	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
-	%tmp101 = mul double 0.000000e+00, %tmp100		; <double> [#uses=1]
+	%tmp101 = fmul double 0.000000e+00, %tmp100		; <double> [#uses=1]
 	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
 	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%tmp105 = add x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
+	%tmp105 = fadd x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
 	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
 	store double %tmp105106, double* null, align 8
 	ret void
diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index f1300fa..6db6537 100644
--- a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -3,13 +3,13 @@
 define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
 entry:
 	%memtmp = alloca { double, double }, align 8		; <{ double, double }*> [#uses=3]
-	%tmp4 = sub double -0.000000e+00, %z.1		; <double> [#uses=1]
+	%tmp4 = fsub double -0.000000e+00, %z.1		; <double> [#uses=1]
 	call void @casinh( { double, double }* sret  %memtmp, double %tmp4, double %z.0 ) nounwind 
 	%tmp19 = getelementptr { double, double }* %memtmp, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp20 = load double* %tmp19, align 8		; <double> [#uses=1]
 	%tmp22 = getelementptr { double, double }* %memtmp, i32 0, i32 1		; <double*> [#uses=1]
 	%tmp23 = load double* %tmp22, align 8		; <double> [#uses=1]
-	%tmp32 = sub double -0.000000e+00, %tmp20		; <double> [#uses=1]
+	%tmp32 = fsub double -0.000000e+00, %tmp20		; <double> [#uses=1]
 	%tmp37 = getelementptr { double, double }* %agg.result, i32 0, i32 0		; <double*> [#uses=1]
 	store double %tmp23, double* %tmp37, align 8
 	%tmp40 = getelementptr { double, double }* %agg.result, i32 0, i32 1		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
index b3fe9ab..230af57 100644
--- a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
@@ -33,23 +33,23 @@ bb5.i.i31:		; preds = %bb3.i27
 	br i1 %tmp10.i.i30, label %bb13.i.i37, label %bb30.i.i43
 
 bb13.i.i37:		; preds = %bb5.i.i31
-	%tmp15.i.i32 = sub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
+	%tmp15.i.i32 = fsub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
 	%tmp17.i.i33 = fdiv double %tmp15.i.i32, %tmp12.i23		; <double> [#uses=3]
-	%tmp20.i4.i = mul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
-	%tmp21.i.i34 = add double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp20.i4.i = fmul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp21.i.i34 = fadd double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
 	%tmp22.i.i35 = call double @llvm.sqrt.f64( double %tmp21.i.i34 ) nounwind 		; <double> [#uses=1]
 	%tmp23.i5.i = fdiv double 1.000000e+00, %tmp22.i.i35		; <double> [#uses=2]
-	%tmp28.i.i36 = mul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp28.i.i36 = fmul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
 	br label %Givens.exit.i49
 
 bb30.i.i43:		; preds = %bb5.i.i31
-	%tmp32.i.i38 = sub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
+	%tmp32.i.i38 = fsub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
 	%tmp34.i.i39 = fdiv double %tmp32.i.i38, %tmp22.i25		; <double> [#uses=3]
-	%tmp37.i6.i = mul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
-	%tmp38.i.i40 = add double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp37.i6.i = fmul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp38.i.i40 = fadd double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
 	%tmp39.i7.i = call double @llvm.sqrt.f64( double %tmp38.i.i40 ) nounwind 		; <double> [#uses=1]
 	%tmp40.i.i41 = fdiv double 1.000000e+00, %tmp39.i7.i		; <double> [#uses=2]
-	%tmp45.i.i42 = mul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp45.i.i42 = fmul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
 	br label %Givens.exit.i49
 
 Givens.exit.i49:		; preds = %bb3.i27.Givens.exit.i49_crit_edge, %bb30.i.i43, %bb13.i.i37
diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index 96ac7a7..fe0ee8a 100644
--- a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -24,20 +24,20 @@ bb35:		; preds = %bb24, %entry
 	%tmp42 = sdiv i32 %i, 9		; <i32> [#uses=1]
 	%tmp43 = add i32 %tmp42, -1		; <i32> [#uses=1]
 	%tmp4344 = sitofp i32 %tmp43 to double		; <double> [#uses=1]
-	%tmp17.i76 = mul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
+	%tmp17.i76 = fmul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
 	%tmp48 = sdiv i32 %i, 3		; <i32> [#uses=1]
 	%tmp49 = srem i32 %tmp48, 3		; <i32> [#uses=1]
 	%tmp50 = add i32 %tmp49, -1		; <i32> [#uses=1]
 	%tmp5051 = sitofp i32 %tmp50 to double		; <double> [#uses=1]
-	%tmp17.i63 = mul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
+	%tmp17.i63 = fmul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
 	%tmp55 = srem i32 %i, 3		; <i32> [#uses=1]
 	%tmp56 = add i32 %tmp55, -1		; <i32> [#uses=1]
 	%tmp5657 = sitofp i32 %tmp56 to double		; <double> [#uses=1]
 	%tmp15.i49 = getelementptr %struct.Lattice* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp16.i50 = load double* %tmp15.i49, align 4		; <double> [#uses=1]
-	%tmp17.i = mul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
-	%tmp20.i39 = add double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
-	%tmp20.i23 = add double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
+	%tmp17.i = fmul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
+	%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
+	%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
 	br i1 false, label %bb58.preheader, label %bb81
 
 bb58.preheader:		; preds = %bb35
@@ -55,7 +55,7 @@ bb58:		; preds = %bb58, %bb58.preheader
 	%tmp95.i = and i32 %tmp88.i, -268435456		; <i32> [#uses=1]
 	%tmp97.i = or i32 0, %tmp95.i		; <i32> [#uses=1]
 	store i32 %tmp97.i, i32* %tmp25.i, align 1
-	%tmp6.i = add double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
+	%tmp6.i = fadd double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
 	%exitcond96 = icmp eq i32 0, %smax		; <i1> [#uses=1]
 	br i1 %exitcond96, label %bb81, label %bb58
 
diff --git a/test/CodeGen/X86/2008-02-27-PEICrash.ll b/test/CodeGen/X86/2008-02-27-PEICrash.ll
index b644d8f..055eabb 100644
--- a/test/CodeGen/X86/2008-02-27-PEICrash.ll
+++ b/test/CodeGen/X86/2008-02-27-PEICrash.ll
@@ -13,21 +13,21 @@ bb56:		; preds = %bb33, %entry
 	%a.pn = phi float [ %a, %bb33 ], [ %b, %entry ]		; <float> [#uses=1]
 	%tmp41.pn508 = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
 	%tmp51.pn = phi float [ 0.000000e+00, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
-	%tmp44.pn = mul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
-	%tmp46.pn = add float %tmp44.pn, %a.pn		; <float> [#uses=1]
-	%tmp53.pn = sub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
+	%tmp44.pn = fmul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
+	%tmp46.pn = fadd float %tmp44.pn, %a.pn		; <float> [#uses=1]
+	%tmp53.pn = fsub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
 	%x.0 = fdiv float %tmp46.pn, %tmp41.pn508		; <float> [#uses=1]
 	%y.0 = fdiv float %tmp53.pn, 0.000000e+00		; <float> [#uses=1]
 	br i1 false, label %bb433, label %bb98
 
 bb98:		; preds = %bb56
-	%tmp102 = mul float 0.000000e+00, %a		; <float> [#uses=1]
-	%tmp106 = mul float 0.000000e+00, %b		; <float> [#uses=1]
+	%tmp102 = fmul float 0.000000e+00, %a		; <float> [#uses=1]
+	%tmp106 = fmul float 0.000000e+00, %b		; <float> [#uses=1]
 	br label %bb433
 
 bb433:		; preds = %bb98, %bb56
 	%x.1 = phi float [ %tmp102, %bb98 ], [ %x.0, %bb56 ]		; <float> [#uses=0]
 	%y.1 = phi float [ %tmp106, %bb98 ], [ %y.0, %bb56 ]		; <float> [#uses=1]
-	%tmp460 = add float %y.1, 0.000000e+00		; <float> [#uses=0]
+	%tmp460 = fadd float %y.1, 0.000000e+00		; <float> [#uses=0]
 	ret i64 0
 }
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
index c3b4a25..4b6758d 100644
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -14,7 +14,7 @@ entry:
 	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
 	br i1 %tmp30, label %bb33, label %bb87.preheader
 bb33:		; preds = %entry
-	%tmp28 = add float 0.000000e+00, %tmp24		; <float> [#uses=1]
+	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
 	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
 	br i1 %tmp35, label %bb38, label %bb87.preheader
 bb38:		; preds = %bb33
@@ -24,7 +24,7 @@ bb43:		; preds = %bb38
 	store i32 %tmp53, i32* null, align 4
 	ret void
 bb50:		; preds = %bb38
-	%tmp56 = sub float 1.800000e+01, %tmp24		; <float> [#uses=1]
+	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
 	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
 	br i1 %tmp57, label %bb64, label %bb87.preheader
 bb64:		; preds = %bb50
diff --git a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
index 1e5ab99..53bb054 100644
--- a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
+++ b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
@@ -3,10 +3,10 @@
 define void @t() {
 entry:
 	%tmp455 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 0, i32 3, i32 2 >		; <<4 x float>> [#uses=1]
-	%tmp457 = mul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
+	%tmp457 = fmul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
 	%tmp461 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
 	%tmp465 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
-	%tmp466 = sub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
+	%tmp466 = fsub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
 	%tmp536 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp466, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
 	%tmp542 = shufflevector <4 x float> %tmp536, <4 x float> zeroinitializer, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
 	%tmp580 = bitcast <4 x float> %tmp542 to <4 x i32>		; <<4 x i32>> [#uses=1]
@@ -15,10 +15,10 @@ entry:
 	%tmp592 = bitcast <4 x i32> %tmp591 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp609 = fdiv <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, %tmp592		; <<4 x float>> [#uses=1]
 	%tmp652 = shufflevector <4 x float> %tmp609, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>> [#uses=1]
-	%tmp662 = mul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
+	%tmp662 = fmul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
 	%tmp678 = shufflevector <4 x float> %tmp662, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
-	%tmp753 = mul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
-	%tmp754 = sub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
+	%tmp753 = fmul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
+	%tmp754 = fsub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp754, <4 x float>* null, align 16
 	unreachable
 }
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index 8800357..ae30385 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -70,567 +70,567 @@ define void @""() {
 	load <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
 	load <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
 	load <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
-	mul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
-	mul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
-	mul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
-	mul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
-	mul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
-	mul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
-	mul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
-	mul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
-	mul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
-	mul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
-	mul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
-	mul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
-	mul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
-	mul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
-	mul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
-	mul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
-	mul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
-	mul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
-	mul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
-	mul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
-	mul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
-	mul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
-	mul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
-	mul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
-	mul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
-	mul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
-	mul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
-	mul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
-	mul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
-	mul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
-	mul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
-	mul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
-	mul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
-	mul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
-	mul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
-	mul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
-	mul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
-	mul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
-	mul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
-	mul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
-	mul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
-	mul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
-	mul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
-	mul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
-	mul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
-	mul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
-	mul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
-	mul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
-	mul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
-	mul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
-	mul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
-	mul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
-	mul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
-	mul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
-	mul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
-	mul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
-	mul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
-	mul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
-	mul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
-	mul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
-	mul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
-	mul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
-	mul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
-	mul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
-	mul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
-	mul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
-	mul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
-	mul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
-	mul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
-	mul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
-	mul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
-	mul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
-	mul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
-	mul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
-	mul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
-	mul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
-	mul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
-	mul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
-	mul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
-	mul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
-	mul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
-	mul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
-	mul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
-	mul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
-	mul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
-	mul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
-	mul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
-	mul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
-	mul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
-	mul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
-	mul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
-	mul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
-	mul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
-	mul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
-	mul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
-	mul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
-	mul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
-	mul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
-	mul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
-	mul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
-	mul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
-	mul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
-	mul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
-	mul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
-	mul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
-	mul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
-	mul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
-	mul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
-	mul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
-	mul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
-	mul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
-	mul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
-	mul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
-	mul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
-	mul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
-	mul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
-	mul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
-	mul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
-	mul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
-	mul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
-	mul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
-	mul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
-	mul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
-	mul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
-	mul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
-	mul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
-	mul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
-	mul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
-	mul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
-	mul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
-	mul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
-	mul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
-	mul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
-	mul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
-	mul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
-	mul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
-	mul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
-	mul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
-	mul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
-	mul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
-	mul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
-	mul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
-	mul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
-	mul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
-	mul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
-	mul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
-	mul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
-	mul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
-	mul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
-	mul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
-	mul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
-	mul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
-	mul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
-	mul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
-	mul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
-	mul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
-	mul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
-	mul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
-	mul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
-	mul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
-	mul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
-	mul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
-	mul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
-	mul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
-	mul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
-	mul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
-	mul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
-	mul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
-	mul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
-	mul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
-	mul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
-	mul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
-	mul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
-	mul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
-	mul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
-	mul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
-	mul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
-	mul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
-	mul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
-	mul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
-	mul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
-	mul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
-	mul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
-	mul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
-	mul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
-	mul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
-	mul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
-	mul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
-	mul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
-	mul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
-	mul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
-	mul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
-	mul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
-	mul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
-	mul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
-	mul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
-	mul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
-	mul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
-	mul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
-	mul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
-	mul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
-	mul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
-	mul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
-	mul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
-	mul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
-	mul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
-	mul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
-	mul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
-	mul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
-	mul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
-	mul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
-	mul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
-	mul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
-	mul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
-	mul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
-	mul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
-	mul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
-	mul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
-	mul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
-	mul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
-	mul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
-	mul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
-	mul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
-	mul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
-	mul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
-	mul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
-	mul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
-	mul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
-	mul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
-	mul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
-	mul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
-	mul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
-	mul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
-	mul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
-	mul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
-	mul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
-	mul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
-	mul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
-	mul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
-	mul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
-	mul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
-	mul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
-	mul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
-	mul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
-	mul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
-	mul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
-	mul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
-	mul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
-	mul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
-	mul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
-	mul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
-	mul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
-	mul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
-	mul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
-	mul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
-	mul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
-	mul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
-	mul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
-	mul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
-	mul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
-	mul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
-	mul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
-	mul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
-	mul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
-	mul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
-	mul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
-	mul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
-	mul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
-	mul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
-	mul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
-	mul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
-	mul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
-	mul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
-	mul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
-	mul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
-	mul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
-	mul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
-	mul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
-	mul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
-	mul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
-	mul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
-	mul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
-	mul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
-	mul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
-	mul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
-	mul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
-	mul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
-	mul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
-	mul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
-	mul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
-	mul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
-	mul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
-	mul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
-	mul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
-	mul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
-	mul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
-	mul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
-	mul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
-	mul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
-	mul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
-	mul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
-	mul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
-	mul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
-	mul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
-	mul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
-	mul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
-	mul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
-	mul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
-	mul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
-	mul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
-	mul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
-	mul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
-	mul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
-	mul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
-	mul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
-	mul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
-	mul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
-	mul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
-	mul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
-	mul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
-	mul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
-	mul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
-	mul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
-	mul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
-	mul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
-	mul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
-	mul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
-	mul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
-	mul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
-	mul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
-	mul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
-	mul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
-	mul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
-	mul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
-	mul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
-	mul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
-	mul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
-	mul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
-	mul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
-	mul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
-	mul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
-	mul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
-	mul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
-	mul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
-	mul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
-	mul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
-	mul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
-	mul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
-	mul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
-	mul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
-	mul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
-	mul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
-	mul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
-	mul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
-	mul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
-	mul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
-	mul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
-	mul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
-	mul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
-	mul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
-	mul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
-	mul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
-	mul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
-	mul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
-	mul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
-	mul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
-	mul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
-	mul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
-	mul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
-	mul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
-	mul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
-	mul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
-	mul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
-	mul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
-	mul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
-	mul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
-	mul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
-	mul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
-	mul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
-	mul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
-	mul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
-	mul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
-	mul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
-	mul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
-	mul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
-	mul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
-	mul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
-	mul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
-	mul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
-	mul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
-	mul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
-	mul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
-	mul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
-	mul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
-	mul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
-	mul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
-	mul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
-	mul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
-	mul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
-	mul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
-	mul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
-	mul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
-	mul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
-	mul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
-	mul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
-	mul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
-	mul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
-	mul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
-	mul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
-	mul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
-	mul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
-	mul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
-	mul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
-	mul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
-	mul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
-	mul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
-	mul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
-	mul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
-	mul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
-	mul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
-	mul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
-	mul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
-	mul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
-	mul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
-	mul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
-	mul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
-	mul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
-	mul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
-	mul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
-	mul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
-	mul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
-	mul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
-	mul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
-	mul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
-	mul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
-	mul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
-	mul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
-	mul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
-	mul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
-	mul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
-	mul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
-	mul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
-	mul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
-	mul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
-	mul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
-	mul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
-	mul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
-	mul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
-	mul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
-	mul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
-	mul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
-	mul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
-	mul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
-	mul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
-	mul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
-	mul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
-	mul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
-	mul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
-	mul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
-	mul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
-	mul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
-	mul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
-	mul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
-	mul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
-	mul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
-	mul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
-	mul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
-	mul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
-	mul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
-	mul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
-	mul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
-	mul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
-	mul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
-	mul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
-	mul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
-	mul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
-	mul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
-	mul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
-	mul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
-	mul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
-	mul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
-	mul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
-	mul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
-	mul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
-	mul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
-	mul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
-	mul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
-	mul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
-	mul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
-	mul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
-	mul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
-	mul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
-	mul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
-	mul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
-	mul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
-	mul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
-	mul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
-	mul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
-	mul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
-	mul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
-	mul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
-	mul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
-	mul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
-	mul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
-	mul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
-	mul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
-	mul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
-	mul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
-	mul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
-	mul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
-	mul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
-	mul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
-	mul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
-	mul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
-	mul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
-	mul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
-	mul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
-	mul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
-	mul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
-	mul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
-	mul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
-	mul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
-	mul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
-	mul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
-	mul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
-	mul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
-	mul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
-	mul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
-	add <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
-	add <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
-	add <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
-	add <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
-	add <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
-	add <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
-	add <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
-	add <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
-	add <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
-	add <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
-	add <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
-	add <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
-	add <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
-	add <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
-	add <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
-	add <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
-	add <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
-	add <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
-	add <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
-	add <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
-	add <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
-	add <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
-	add <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
-	add <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
-	add <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
-	add <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
-	add <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
-	add <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
-	add <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
-	add <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
-	add <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
-	add <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
-	add <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
+	fmul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
+	fmul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
+	fmul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
+	fmul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
+	fmul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
+	fmul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
+	fmul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
+	fmul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
+	fmul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
+	fmul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
+	fmul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
+	fmul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
+	fmul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
+	fmul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
+	fmul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
+	fmul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
+	fmul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
+	fmul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
+	fmul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
+	fmul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
+	fmul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
+	fmul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
+	fmul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
+	fmul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
+	fmul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
+	fmul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
+	fmul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
+	fmul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
+	fmul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
+	fmul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
+	fmul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
+	fmul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
+	fmul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
+	fmul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
+	fmul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
+	fmul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
+	fmul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
+	fmul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
+	fmul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
+	fmul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
+	fmul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
+	fmul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
+	fmul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
+	fmul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
+	fmul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
+	fmul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
+	fmul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
+	fmul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
+	fmul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
+	fmul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
+	fmul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
+	fmul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
+	fmul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
+	fmul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
+	fmul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
+	fmul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
+	fmul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
+	fmul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
+	fmul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
+	fmul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
+	fmul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
+	fmul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
+	fmul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
+	fmul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
+	fmul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
+	fmul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
+	fmul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
+	fmul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
+	fmul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
+	fmul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
+	fmul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
+	fmul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
+	fmul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
+	fmul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
+	fmul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
+	fmul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
+	fmul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
+	fmul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
+	fmul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
+	fmul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
+	fmul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
+	fmul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
+	fmul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
+	fmul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
+	fmul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
+	fmul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
+	fmul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
+	fmul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
+	fmul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
+	fmul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
+	fmul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
+	fmul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
+	fmul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
+	fmul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
+	fmul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
+	fmul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
+	fmul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
+	fmul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
+	fmul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
+	fmul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
+	fmul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
+	fmul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
+	fmul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
+	fmul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
+	fmul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
+	fmul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
+	fmul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
+	fmul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
+	fmul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
+	fmul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
+	fmul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
+	fmul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
+	fmul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
+	fmul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
+	fmul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
+	fmul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
+	fmul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
+	fmul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
+	fmul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
+	fmul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
+	fmul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
+	fmul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
+	fmul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
+	fmul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
+	fmul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
+	fmul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
+	fmul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
+	fmul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
+	fmul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
+	fmul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
+	fmul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
+	fmul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
+	fmul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
+	fmul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
+	fmul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
+	fmul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
+	fmul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
+	fmul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
+	fmul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
+	fmul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
+	fmul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
+	fmul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
+	fmul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
+	fmul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
+	fmul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
+	fmul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
+	fmul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
+	fmul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
+	fmul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
+	fmul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
+	fmul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
+	fmul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
+	fmul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
+	fmul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
+	fmul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
+	fmul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
+	fmul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
+	fmul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
+	fmul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
+	fmul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
+	fmul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
+	fmul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
+	fmul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
+	fmul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
+	fmul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
+	fmul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
+	fmul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
+	fmul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
+	fmul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
+	fmul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
+	fmul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
+	fmul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
+	fmul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
+	fmul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
+	fmul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
+	fmul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
+	fmul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
+	fmul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
+	fmul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
+	fmul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
+	fmul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
+	fmul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
+	fmul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
+	fmul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
+	fmul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
+	fmul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
+	fmul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
+	fmul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
+	fmul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
+	fmul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
+	fmul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
+	fmul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
+	fmul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
+	fmul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
+	fmul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
+	fmul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
+	fmul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
+	fmul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
+	fmul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
+	fmul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
+	fmul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
+	fmul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
+	fmul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
+	fmul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
+	fmul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
+	fmul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
+	fmul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
+	fmul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
+	fmul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
+	fmul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
+	fmul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
+	fmul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
+	fmul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
+	fmul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
+	fmul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
+	fmul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
+	fmul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
+	fmul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
+	fmul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
+	fmul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
+	fmul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
+	fmul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
+	fmul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
+	fmul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
+	fmul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
+	fmul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
+	fmul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
+	fmul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
+	fmul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
+	fmul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
+	fmul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
+	fmul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
+	fmul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
+	fmul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
+	fmul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
+	fmul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
+	fmul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
+	fmul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
+	fmul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
+	fmul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
+	fmul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
+	fmul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
+	fmul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
+	fmul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
+	fmul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
+	fmul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
+	fmul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
+	fmul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
+	fmul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
+	fmul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
+	fmul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
+	fmul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
+	fmul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
+	fmul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
+	fmul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
+	fmul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
+	fmul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
+	fmul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
+	fmul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
+	fmul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
+	fmul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
+	fmul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
+	fmul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
+	fmul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
+	fmul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
+	fmul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
+	fmul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
+	fmul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
+	fmul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
+	fmul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
+	fmul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
+	fmul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
+	fmul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
+	fmul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
+	fmul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
+	fmul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
+	fmul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
+	fmul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
+	fmul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
+	fmul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
+	fmul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
+	fmul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
+	fmul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
+	fmul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
+	fmul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
+	fmul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
+	fmul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
+	fmul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
+	fmul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
+	fmul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
+	fmul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
+	fmul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
+	fmul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
+	fmul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
+	fmul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
+	fmul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
+	fmul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
+	fmul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
+	fmul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
+	fmul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
+	fmul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
+	fmul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
+	fmul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
+	fmul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
+	fmul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
+	fmul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
+	fmul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
+	fmul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
+	fmul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
+	fmul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
+	fmul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
+	fmul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
+	fmul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
+	fmul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
+	fmul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
+	fmul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
+	fmul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
+	fmul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
+	fmul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
+	fmul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
+	fmul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
+	fmul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
+	fmul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
+	fmul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
+	fmul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
+	fmul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
+	fmul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
+	fmul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
+	fmul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
+	fmul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
+	fmul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
+	fmul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
+	fmul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
+	fmul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
+	fmul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
+	fmul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
+	fmul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
+	fmul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
+	fmul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
+	fmul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
+	fmul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
+	fmul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
+	fmul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
+	fmul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
+	fmul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
+	fmul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
+	fmul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
+	fmul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
+	fmul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
+	fmul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
+	fmul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
+	fmul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
+	fmul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
+	fmul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
+	fmul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
+	fmul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
+	fmul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
+	fmul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
+	fmul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
+	fmul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
+	fmul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
+	fmul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
+	fmul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
+	fmul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
+	fmul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
+	fmul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
+	fmul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
+	fmul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
+	fmul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
+	fmul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
+	fmul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
+	fmul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
+	fmul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
+	fmul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
+	fmul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
+	fmul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
+	fmul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
+	fmul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
+	fmul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
+	fmul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
+	fmul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
+	fmul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
+	fmul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
+	fmul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
+	fmul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
+	fmul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
+	fmul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
+	fmul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
+	fmul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
+	fmul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
+	fmul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
+	fmul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
+	fmul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
+	fmul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
+	fmul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
+	fmul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
+	fmul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
+	fmul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
+	fmul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
+	fmul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
+	fmul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
+	fmul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
+	fmul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
+	fmul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
+	fmul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
+	fmul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
+	fmul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
+	fmul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
+	fmul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
+	fmul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
+	fmul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
+	fmul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
+	fmul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
+	fmul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
+	fmul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
+	fmul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
+	fmul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
+	fmul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
+	fmul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
+	fmul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
+	fmul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
+	fmul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
+	fmul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
+	fmul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
+	fmul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
+	fmul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
+	fmul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
+	fmul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
+	fmul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
+	fmul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
+	fmul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
+	fmul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
+	fmul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
+	fmul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
+	fmul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
+	fmul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
+	fmul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
+	fmul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
+	fmul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
+	fmul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
+	fmul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
+	fmul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
+	fmul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
+	fmul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
+	fmul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
+	fmul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
+	fmul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
+	fmul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
+	fmul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
+	fmul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
+	fmul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
+	fmul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
+	fmul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
+	fmul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
+	fmul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
+	fmul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
+	fmul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
+	fmul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
+	fmul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
+	fmul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
+	fmul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
+	fmul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
+	fmul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
+	fmul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
+	fmul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
+	fmul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
+	fmul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
+	fmul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
+	fmul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
+	fmul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
+	fmul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
+	fmul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
+	fmul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
+	fmul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
+	fmul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
+	fmul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
+	fmul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
+	fmul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
+	fmul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
+	fmul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
+	fmul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
+	fmul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
+	fmul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
+	fmul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
+	fmul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
+	fmul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
+	fmul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
+	fmul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
+	fmul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
+	fmul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
+	fmul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
+	fmul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
+	fmul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
+	fmul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
+	fmul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
+	fmul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
+	fmul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
+	fmul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
+	fmul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
+	fmul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
+	fmul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
+	fmul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
+	fmul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
+	fmul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
+	fmul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
+	fmul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
+	fmul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
+	fmul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
+	fmul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
+	fmul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
+	fmul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
+	fmul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
+	fmul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
+	fmul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
+	fmul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
+	fmul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
+	fmul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
+	fmul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
+	fmul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
+	fmul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
+	fmul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
+	fmul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
+	fmul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
+	fmul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
+	fmul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
+	fadd <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
+	fadd <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
+	fadd <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
+	fadd <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
+	fadd <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
+	fadd <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
+	fadd <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
+	fadd <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
+	fadd <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
+	fadd <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
+	fadd <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
+	fadd <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
+	fadd <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
+	fadd <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
+	fadd <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
+	fadd <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
+	fadd <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
+	fadd <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
+	fadd <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
+	fadd <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
+	fadd <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
+	fadd <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
+	fadd <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
+	fadd <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
+	fadd <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
+	fadd <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
+	fadd <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
+	fadd <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
+	fadd <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
+	fadd <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
+	fadd <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
+	fadd <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
+	fadd <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
 	store <4 x float> %593, <4 x float>* @0, align 1
 	ret void
 }
diff --git a/test/CodeGen/X86/2008-07-23-VSetCC.ll b/test/CodeGen/X86/2008-07-23-VSetCC.ll
index 735c610..da6c089 100644
--- a/test/CodeGen/X86/2008-07-23-VSetCC.ll
+++ b/test/CodeGen/X86/2008-07-23-VSetCC.ll
@@ -13,12 +13,12 @@ bb.nph:		; preds = %bb.nph, %0
 	insertelement <4 x i32> zeroinitializer, i32 %5, i32 3		; <<4 x i32>>:6 [#uses=1]
 	and <4 x i32> zeroinitializer, %6		; <<4 x i32>>:7 [#uses=1]
 	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=1]
-	mul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
+	fmul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
 	bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>>:10 [#uses=1]
 	or <4 x i32> %10, zeroinitializer		; <<4 x i32>>:11 [#uses=1]
 	bitcast <4 x i32> %11 to <4 x float>		; <<4 x float>>:12 [#uses=1]
-	mul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
-	sub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
+	fmul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
+	fsub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
 	extractelement <4 x float> %14, i32 3		; <float>:15 [#uses=1]
 	call float @fmaxf( float 0.000000e+00, float %15 )		; <float>:16 [#uses=0]
 	br label %bb.nph
diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
index b50f2b0..4e35332 100644
--- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
+++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
@@ -13,7 +13,7 @@ bb151:		; preds = %entry
 
 bb163:		; preds = %bb151, %entry
 	%tmp366 = load double* null, align 8		; <double> [#uses=1]
-	%tmp368 = mul double %tmp366, 0.000000e+00		; <double> [#uses=1]
+	%tmp368 = fmul double %tmp366, 0.000000e+00		; <double> [#uses=1]
 	%tmp368226 = bitcast double %tmp368 to i64		; <i64> [#uses=1]
 	br label %bb5.i
 
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 2c8e12f..ad13b85 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -26,7 +26,7 @@ bb22.preheader:		; preds = %bb24.preheader, %bb22.preheader
 	br label %bb22.preheader
 
 bb25:		; preds = %bb24.preheader
-	%7 = mul double 0.000000e+00, %6		; <double> [#uses=0]
+	%7 = fmul double 0.000000e+00, %6		; <double> [#uses=0]
 	%8 = add i32 %i3.122100, 0		; <i32> [#uses=1]
 	%9 = icmp sgt i32 %8, 0		; <i1> [#uses=1]
 	br i1 %9, label %bb3, label %bb24.preheader
@@ -37,7 +37,7 @@ bb24.preheader:		; preds = %bb25, %bb18
 	br i1 %10, label %bb25, label %bb22.preheader
 
 bb30.loopexit:		; preds = %bb
-	%11 = mul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
+	%11 = fmul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
 	br label %bb3
 }
 
diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
index bb9fbdb..36a054a 100644
--- a/test/CodeGen/X86/2008-11-03-F80VAARG.ll
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -12,6 +12,6 @@ define x86_fp80 @test(...) nounwind {
 	call void @llvm.va_start(i8* %v1)
 	%t1 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
 	%t2 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
-	%t = add x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
+	%t = fadd x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %t
 }
diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
index dbb7acf..b6b5cbd 100644
--- a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
@@ -145,7 +145,7 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%20 = add <4 x i32> %19, zeroinitializer		; <<4 x i32>> [#uses=3]
 	%21 = load i32* null, align 4		; <i32> [#uses=0]
 	%22 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind readnone		; <<4 x float>> [#uses=1]
-	%23 = mul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%23 = fmul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
 	%tmp2114.i119.i.i = extractelement <4 x i32> %20, i32 1		; <i32> [#uses=1]
 	%24 = shl i32 %tmp2114.i119.i.i, 2		; <i32> [#uses=1]
 	%25 = getelementptr i8* %11, i32 %24		; <i8*> [#uses=1]
@@ -160,7 +160,7 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%33 = bitcast <8 x i16> %32 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%34 = shufflevector <4 x i32> %33, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
 	%35 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %34) nounwind readnone		; <<4 x float>> [#uses=1]
-	%36 = mul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
 	%tmp2113.i124.i.i = extractelement <4 x i32> %20, i32 2		; <i32> [#uses=1]
 	%37 = shl i32 %tmp2113.i124.i.i, 2		; <i32> [#uses=1]
 	%38 = getelementptr i8* %14, i32 %37		; <i8*> [#uses=1]
@@ -175,7 +175,7 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%46 = bitcast <8 x i16> %45 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%47 = shufflevector <4 x i32> %46, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
 	%48 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %47) nounwind readnone		; <<4 x float>> [#uses=1]
-	%49 = mul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%49 = fmul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
 	%tmp2112.i129.i.i = extractelement <4 x i32> %20, i32 3		; <i32> [#uses=1]
 	%50 = shl i32 %tmp2112.i129.i.i, 2		; <i32> [#uses=1]
 	%51 = getelementptr i8* %17, i32 %50		; <i8*> [#uses=1]
@@ -190,15 +190,15 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%59 = bitcast <8 x i16> %58 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%60 = shufflevector <4 x i32> %59, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
 	%61 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %60) nounwind readnone		; <<4 x float>> [#uses=1]
-	%62 = mul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
-	%63 = mul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
-	%64 = add <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
-	%65 = mul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
-	%66 = add <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
-	%67 = mul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
-	%68 = add <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
-	%69 = mul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
-	%70 = add <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
+	%62 = fmul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%63 = fmul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
+	%64 = fadd <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
+	%65 = fmul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
+	%66 = fadd <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
+	%67 = fmul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
+	%68 = fadd <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
+	%69 = fmul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
+	%70 = fadd <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
 	%tmp7452.i.i.i = bitcast <4 x float> %64 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp7454.i.i.i = and <4 x i32> %tmp7452.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%tmp7459.i.i.i = or <4 x i32> %tmp7454.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
index 6de11c9..340608a 100644
--- a/test/CodeGen/X86/2009-01-16-UIntToFP.ll
+++ b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
@@ -22,10 +22,10 @@ bb2:		; preds = %bb1, %bb, %entry
 	%5 = lshr i64 %u_addr.0, 32		; <i64> [#uses=1]
 	%6 = trunc i64 %5 to i32		; <i32> [#uses=1]
 	%7 = uitofp i32 %6 to double		; <double> [#uses=1]
-	%8 = mul double %7, 0x41F0000000000000		; <double> [#uses=1]
+	%8 = fmul double %7, 0x41F0000000000000		; <double> [#uses=1]
 	%9 = trunc i64 %u_addr.0 to i32		; <i32> [#uses=1]
 	%10 = uitofp i32 %9 to double		; <double> [#uses=1]
-	%11 = add double %10, %8		; <double> [#uses=1]
+	%11 = fadd double %10, %8		; <double> [#uses=1]
 	%12 = fptrunc double %11 to float		; <float> [#uses=1]
 	ret float %12
 }
diff --git a/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
index 747dc8a..1d10319 100644
--- a/test/CodeGen/X86/2009-02-12-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
@@ -3,9 +3,9 @@
 
 define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
 entry:
-	%0 = mul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
-	%1 = sub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
-	%2 = add x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
+	%0 = fmul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
+	%1 = fsub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
+	%2 = fadd x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
 	%3 = fcmp uno x86_fp80 %1, 0xK00000000000000000000		; <i1> [#uses=1]
 	%4 = fcmp uno x86_fp80 %2, 0xK00000000000000000000		; <i1> [#uses=1]
 	%or.cond = and i1 %3, %4		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index b772bf8..3dbfa80 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -7,7 +7,7 @@ entry:
 	%tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
 	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
 	%0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone		; <<2 x double>> [#uses=1]
-	%tmp.i = add <2 x double> %0, %C		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %0, %C		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp.i
 }
 
diff --git a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
index 1eefaa9..6f16ced 100644
--- a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
+++ b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
@@ -6,7 +6,7 @@ define i32 @x(i32 %y) nounwind readnone {
 entry:
 	%tmp14 = zext i32 %y to i80		; <i80> [#uses=1]
 	%tmp15 = bitcast i80 %tmp14 to x86_fp80		; <x86_fp80> [#uses=1]
-	%add = add x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
+	%add = fadd x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
 	%tmp11 = bitcast x86_fp80 %add to i80		; <i80> [#uses=1]
 	%tmp10 = trunc i80 %tmp11 to i32		; <i32> [#uses=1]
 	ret i32 %tmp10
diff --git a/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
index 14bdcc3..2ccd771 100644
--- a/test/CodeGen/X86/2009-03-09-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
@@ -5,7 +5,7 @@ define void @__mulxc3(x86_fp80 %b) nounwind {
 entry:
 	%call = call x86_fp80 @y(x86_fp80* null, x86_fp80* null)		; <x86_fp80> [#uses=0]
 	%cmp = fcmp ord x86_fp80 %b, 0xK00000000000000000000		; <i1> [#uses=1]
-	%sub = sub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
+	%sub = fsub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
 	%cmp7 = fcmp uno x86_fp80 %sub, 0xK00000000000000000000		; <i1> [#uses=1]
 	%and12 = and i1 %cmp7, %cmp		; <i1> [#uses=1]
 	%and = zext i1 %and12 to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index 75af992..ec060e4 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -19,18 +19,18 @@ bb1:		; preds = %newFuncRoot
 	%0 = tail call double @llvm.sqrt.f64(double %.reload8)		; <double> [#uses=1]
 	%1 = fptrunc x86_fp80 %.reload6 to double		; <double> [#uses=1]
 	%2 = tail call double @fabs(double %1) nounwind readnone		; <double> [#uses=1]
-	%3 = add double %0, %2		; <double> [#uses=1]
+	%3 = fadd double %0, %2		; <double> [#uses=1]
 	%4 = tail call double @llvm.pow.f64(double %3, double 0x3FD5555555555555)		; <double> [#uses=1]
 	%5 = fpext double %4 to x86_fp80		; <x86_fp80> [#uses=2]
 	%6 = fdiv x86_fp80 %.reload5, %5		; <x86_fp80> [#uses=1]
-	%7 = add x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
+	%7 = fadd x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
 	%8 = fptrunc x86_fp80 %7 to double		; <double> [#uses=1]
 	%9 = fcmp olt x86_fp80 %.reload6, 0xK00000000000000000000		; <i1> [#uses=1]
 	%iftmp.6.0 = select i1 %9, double 1.000000e+00, double -1.000000e+00		; <double> [#uses=1]
-	%10 = mul double %8, %iftmp.6.0		; <double> [#uses=1]
+	%10 = fmul double %8, %iftmp.6.0		; <double> [#uses=1]
 	%11 = fpext double %10 to x86_fp80		; <x86_fp80> [#uses=1]
 	%12 = fdiv x86_fp80 %.reload, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%13 = add x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
+	%13 = fadd x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
 	%14 = fptrunc x86_fp80 %13 to double		; <double> [#uses=1]
 	store double %14, double* %x, align 1
 	br label %bb1.ret.exitStub
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index a963145..b30d41e 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static -stats -info-output-file - > %t
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep {%rsp} %t
 ; RUN: not grep {%rbp} %t
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
new file mode 100644
index 0000000..c628b8a
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
+target triple = "x86_64-mingw64"
+
+define x86_fp80 @a(i64 %x) nounwind readnone {
+entry:
+	%conv = sitofp i64 %x to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %conv
+}
+
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
new file mode 100644
index 0000000..33d7972
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -o %t1 -f
+; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
+; RUN: grep "movaps	\\\%xmm8, \\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps	\\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
+target triple = "x86_64-mingw64"
+
+define i32 @a() nounwind {
+entry:
+	tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret i32 undef
+}
+
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
new file mode 100644
index 0000000..fa90fa9
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -0,0 +1,48 @@
+; RUN: llvm-as < %s | llc -march=x86
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+
+define fastcc void @MinSize(%struct.rec* %x) nounwind {
+entry:
+	%tmp13 = load i8* undef, align 4		; <i8> [#uses=3]
+	%tmp14 = zext i8 %tmp13 to i32		; <i32> [#uses=2]
+	switch i32 %tmp14, label %bb1109 [
+		i32 42, label %bb246
+	]
+
+bb246:		; preds = %entry, %entry
+	switch i8 %tmp13, label %bb249 [
+		i8 42, label %bb269
+		i8 44, label %bb269
+	]
+
+bb249:		; preds = %bb246
+	%tmp3240 = icmp eq i8 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp3240, label %bb974, label %bb269
+
+bb269:
+	%tmp3424 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1		; <%struct.rec**> [#uses=0]
+	unreachable
+
+bb974:
+	unreachable
+
+bb1109:		; preds = %entry
+	call fastcc void @Image(i32 %tmp14) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare fastcc void @Image(i32) nounwind
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
new file mode 100644
index 0000000..94df530
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movl
+
+define <8 x i8> @a(i8 zeroext %x) nounwind {
+  %r = insertelement <8 x i8> undef, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
new file mode 100644
index 0000000..220423a
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: grep movzwl %t1 | count 2
+; RUN: grep movzbl %t1 | count 2
+; RUN: grep movd %t1 | count 4
+
+define <4 x i16> @a(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
+  ret <4 x i16> %r
+}
+
+define <8 x i16> @b(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
+  ret <8 x i16> %r
+}
+
+define <8 x i8> @c(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
+define <16 x i8> @d(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
+  ret <16 x i8> %r
+}
+
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
new file mode 100644
index 0000000..2e3f195
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc
+
+define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
+entry:
+	%conv = bitcast <2 x i64> %a to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%conv2 = trunc i32 %b to i16		; <i16> [#uses=1]
+	%and = and i32 %imm, 7		; <i32> [#uses=1]
+	%vecins = insertelement <8 x i16> %conv, i16 %conv2, i32 %and		; <<8 x i16>> [#uses=1]
+	%conv6 = bitcast <8 x i16> %vecins to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %conv6
+}
diff --git a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
new file mode 100644
index 0000000..589a880
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; PR2598
+
+define <2 x float> @a(<2 x i32> %i) nounwind {
+  %r = sitofp <2 x i32> %i to <2 x float> 
+  ret <2 x float> %r
+}
+
+define <2 x i32> @b(<2 x float> %i) nounwind {
+  %r = fptosi <2 x float> %i to <2 x i32> 
+  ret <2 x i32> %r
+}
+
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
new file mode 100644
index 0000000..a46fd1a
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc
+
+define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %0
+}
+
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index f1fec3f..513599c 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -141,26 +141,6 @@
 ; RUN: not grep @PLTOFF %t
 ; RUN: grep {call	\\\*} %t | count 10
 ; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 91
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 70
-; RUN: grep movq %t | count 56
-; RUN: grep addq %t | count 20
-; RUN: grep subq %t | count 14
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 139
 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small > %t
 ; RUN: not grep leal %t
 ; RUN: grep movl %t | count 95
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index b432c39..b9ce10f 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -8,18 +8,18 @@
 define void @goo(double* %r, double* %p, double* %q) nounwind {
 entry:
 	%0 = load double* %p, align 8
-	%1 = add double %0, 1.100000e+00
-	%2 = mul double %1, 1.200000e+00
-	%3 = add double %2, 1.300000e+00
-	%4 = mul double %3, 1.400000e+00
-	%5 = add double %4, 1.500000e+00
+	%1 = fadd double %0, 1.100000e+00
+	%2 = fmul double %1, 1.200000e+00
+	%3 = fadd double %2, 1.300000e+00
+	%4 = fmul double %3, 1.400000e+00
+	%5 = fadd double %4, 1.500000e+00
 	%6 = fptosi double %5 to i32
 	%7 = load double* %r, align 8
-	%8 = add double %7, 7.100000e+00
-	%9 = mul double %8, 7.200000e+00
-	%10 = add double %9, 7.300000e+00
-	%11 = mul double %10, 7.400000e+00
-	%12 = add double %11, 7.500000e+00
+	%8 = fadd double %7, 7.100000e+00
+	%9 = fmul double %8, 7.200000e+00
+	%10 = fadd double %9, 7.300000e+00
+	%11 = fmul double %10, 7.400000e+00
+	%12 = fadd double %11, 7.500000e+00
 	%13 = fptosi double %12 to i32
 	%14 = icmp slt i32 %6, %13
 	br i1 %14, label %bb, label %return
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 0fae2a6..9939424 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -15,7 +15,7 @@ bb:		; preds = %bb, %entry
 	%tmp2 = getelementptr i32* %source, i32 %neuron.0		; <i32*> [#uses=1]
 	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
 	%tmp34 = sitofp i32 %tmp3 to float		; <float> [#uses=1]
-	%tmp6 = add float %tmp34, %thesum.0		; <float> [#uses=2]
+	%tmp6 = fadd float %tmp34, %thesum.0		; <float> [#uses=2]
 	%indvar.next = add i32 %neuron.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %tmp10		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb13, label %bb
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index ce4abf1..c67e0f5 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -28,7 +28,7 @@ define <4 x float> @test3(<4 x float> %V) {
 entry:
         %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
                                         <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
-        %add = add <4 x float> %tmp8, %V
+        %add = fadd <4 x float> %tmp8, %V
         ret <4 x float> %add
 }
 
diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
index 7299aca..9628f93 100644
--- a/test/CodeGen/X86/coalescer-commute4.ll
+++ b/test/CodeGen/X86/coalescer-commute4.ll
@@ -18,8 +18,8 @@ bb:		; preds = %bb, %bb.preheader
 	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
 	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
 	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
-	%tmp11 = mul float %tmp9, %tmp45		; <float> [#uses=1]
-	%tmp14 = add float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
 	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb23, label %bb
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 29eb6ee..05adb50 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -4,7 +4,7 @@ define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80,
 entry:
 	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
 	%z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
-	%0 = sub x86_fp80 0xK80000000000000000000, %z9
+	%0 = fsub x86_fp80 0xK80000000000000000000, %z9
 	%insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
 	%insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
 	call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 40caaa6..80be854 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -6,8 +6,8 @@
 declare float @qux(float %y)
 
 define float @array(float %a) nounwind {
-  %n = mul float %a, 9.0
+  %n = fmul float %a, 9.0
   %m = call float @qux(float %n)
-  %o = mul float %m, 9.0
+  %o = fmul float %m, 9.0
   ret float %o
 }
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index c89a296..b96fdfc 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
 ; RUN: grep unpcklpd %t | count 1
 ; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
 
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
 
-define void @test(<2 x double>* %dst, <4 x double> %src) {
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
 entry:
         %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
         store <2 x double> %tmp7.i, <2 x double>* %dst
         ret void
 }
+
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/extract-combine.ll b/test/CodeGen/X86/extract-combine.ll
index 9172dce..842ec24 100644
--- a/test/CodeGen/X86/extract-combine.ll
+++ b/test/CodeGen/X86/extract-combine.ll
@@ -7,9 +7,9 @@ entry:
 	%tmp518 = shufflevector <16 x float> %tmp74.i25762, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>		; <<4 x float>> [#uses=1]
 	%movss.i25611 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp518, <4 x i32> <i32 4, i32 1, i32 2, i32 3>		; <<4 x float>> [#uses=1]
 	%conv3.i25615 = shufflevector <4 x float> %movss.i25611, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>		; <<4 x float>> [#uses=1]
-	%sub.i25620 = sub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul.i25621 = mul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
-	%add.i25622 = add <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
+	%sub.i25620 = fsub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul.i25621 = fmul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
+	%add.i25622 = fadd <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
 	store <4 x float> %add.i25622, <4 x float>* null
 	unreachable
 }
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 0646a79..7ac8e04 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -16,7 +16,7 @@ define float @test1(float %X) {
 
 define double @test2(double %X) {
         %Y = fcmp oge double %X, -0.0
-        %Z = sub double -0.0, %X
+        %Z = fsub double -0.0, %X
         %Q = select i1 %Y, double %X, double %Z
         ret double %Q
 }
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 2ee2c83..a9a016b 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -32,10 +32,10 @@ entry:
   br label %fast
 
 fast:
-  %t0 = add double %r, %s
-  %t1 = mul double %t0, %s
-  %t2 = sub double %t1, %s
-  %t3 = add double %t2, 707.0
+  %t0 = fadd double %r, %s
+  %t1 = fmul double %t0, %s
+  %t2 = fsub double %t1, %s
+  %t3 = fadd double %t2, 707.0
   br label %exit
 
 exit:
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
new file mode 100644
index 0000000..8f705a4
--- /dev/null
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llvm-as < %s | llc -march=x86-64 | grep mulps
+
+define void @test14(<4 x float>*) nounwind {
+        load <4 x float>* %0, align 1
+        mul <4 x float> %2, zeroinitializer
+        store <4 x float> %3, <4 x float>* %0, align 1
+        ret void
+}
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 066d38e..f558aca 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -26,23 +26,23 @@ forcond:		; preds = %entry
 
 forbody:		; preds = %forcond
 	%bitcast204.i313 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul233 = mul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul257 = mul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul275 = mul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul233 = fmul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul257 = fmul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul275 = fmul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
 	%tmp51 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %mul275, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast198.i182 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
 	%bitcast204.i185 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp69 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> zeroinitializer) nounwind		; <<4 x i32>> [#uses=1]
 	%tmp70 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp69) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i78 = sub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
-	%mul166.i86 = mul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
-	%add167.i87 = add <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
-	%mul171.i88 = mul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
-	%add172.i89 = add <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%sub140.i78 = fsub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
+	%mul166.i86 = fmul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add167.i87 = fadd <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i88 = fmul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add172.i89 = fadd <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i90 = bitcast <4 x float> %add172.i89 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps178.i92 = and <4 x i32> %bitcast176.i90, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i93 = bitcast <4 x i32> %andnps178.i92 to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i96 = mul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul186.i96 = fmul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i98 = bitcast <4 x float> %mul186.i96 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps192.i100 = and <4 x i32> %bitcast190.i98, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%xorps.i102 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
@@ -50,15 +50,15 @@ forbody:		; preds = %forcond
 	%bitcast204.i104 = bitcast <4 x i32> %orps203.i103 to <4 x float>		; <<4 x float>> [#uses=1]
 	%cmple.i = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> %tmp51, i8 2) nounwind		; <<4 x float>> [#uses=1]
 	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i = sub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
 	%bitcast148.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps150.i = and <4 x i32> %bitcast148.i, < i32 -2139095041, i32 -2139095041, i32 -2139095041, i32 -2139095041 >		; <<4 x i32>> [#uses=0]
-	%mul171.i = mul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
-	%add172.i = add <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i = mul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast189.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
@@ -66,9 +66,9 @@ forbody:		; preds = %forcond
 	%xorps.i = xor <4 x i32> %bitcast198.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul307 = mul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul310 = mul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
-	%mul313 = mul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul307 = fmul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%tmp82 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul307, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast11.i15 = bitcast <4 x float> %tmp82 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps.i17 = and <4 x i32> %bitcast11.i15, zeroinitializer		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index de6ba6c..2b75781 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -28,22 +28,22 @@ forbody:		; preds = %forcond
 	%tmp78 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> < float 1.280000e+02, float 1.280000e+02, float 1.280000e+02, float 1.280000e+02 >, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=2]
 	%tmp79 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp78) nounwind		; <<4 x i32>> [#uses=1]
 	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp79) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i = sub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
-	%mul166.i = mul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
-	%add167.i = add <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
-	%mul171.i = mul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
-	%add172.i = add <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
+	%mul166.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add167.i = fadd <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i = mul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul310 = mul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
-	%mul313 = mul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 1e3ea89..15606c3 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -5,7 +5,7 @@
 
 define i32 @test1(float %x) nounwind  {
 entry:
-	%tmp2 = sub float -0.000000e+00, %x		; <float> [#uses=1]
+	%tmp2 = fsub float -0.000000e+00, %x		; <float> [#uses=1]
 	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
 	ret i32 %tmp210
 }
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 383549a..4e61d0f 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -5,7 +5,7 @@
 define float @foo(float* %col.2.0) {
         %tmp = load float* %col.2.0             ; <float> [#uses=3]
         %tmp16 = fcmp olt float %tmp, 0.000000e+00              ; <i1> [#uses=1]
-        %tmp20 = sub float -0.000000e+00, %tmp          ; <float> [#uses=1]
+        %tmp20 = fsub float -0.000000e+00, %tmp          ; <float> [#uses=1]
         %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp         ; <float> [#uses=1]
         ret float %iftmp.2.0
 }
diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
index ed02c6a..f2017b9 100644
--- a/test/CodeGen/X86/fp_constant_op.ll
+++ b/test/CodeGen/X86/fp_constant_op.ll
@@ -5,22 +5,22 @@
 
 
 define double @foo_add(double %P) {
-	%tmp.1 = add double %P, 1.230000e+02		; <double> [#uses=1]
+	%tmp.1 = fadd double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
 define double @foo_mul(double %P) {
-	%tmp.1 = mul double %P, 1.230000e+02		; <double> [#uses=1]
+	%tmp.1 = fmul double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
 define double @foo_sub(double %P) {
-	%tmp.1 = sub double %P, 1.230000e+02		; <double> [#uses=1]
+	%tmp.1 = fsub double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
 define double @foo_subr(double %P) {
-	%tmp.1 = sub double 1.230000e+02, %P		; <double> [#uses=1]
+	%tmp.1 = fsub double 1.230000e+02, %P		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 7c33cb3..655ad3d 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -5,25 +5,25 @@
 
 define double @test_add(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = add double %X, %Y		; <double> [#uses=1]
+	%R = fadd double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_mul(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = mul double %X, %Y		; <double> [#uses=1]
+	%R = fmul double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_sub(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = sub double %X, %Y		; <double> [#uses=1]
+	%R = fsub double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_subr(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = sub double %Y, %X		; <double> [#uses=1]
+	%R = fsub double %Y, %X		; <double> [#uses=1]
 	ret double %R
 }
 
diff --git a/test/CodeGen/X86/fsxor-alignment.ll b/test/CodeGen/X86/fsxor-alignment.ll
index 71007dc..4d25fca 100644
--- a/test/CodeGen/X86/fsxor-alignment.ll
+++ b/test/CodeGen/X86/fsxor-alignment.ll
@@ -6,8 +6,8 @@
 ; and aren't vector-aligned.
 
 define void @foo(float* %p, float* %q, float %s, float %y) {
-  %ss = sub float -0.0, %s
-  %yy = sub float -0.0, %y
+  %ss = fsub float -0.0, %s
+  %yy = fsub float -0.0, %y
   store float %ss, float* %p
   store float %yy, float* %q
   ret void
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index ee9eaf9..4a85779 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -13,7 +13,7 @@ bb:		; preds = %bb, %entry
 	%2 = load float* %1, align 4		; <float> [#uses=1]
 	%3 = getelementptr float* %B, i32 %i.03		; <float*> [#uses=1]
 	%4 = load float* %3, align 4		; <float> [#uses=1]
-	%5 = add float %2, %4		; <float> [#uses=1]
+	%5 = fadd float %2, %4		; <float> [#uses=1]
 	%6 = getelementptr float* %C, i32 %i.03		; <float*> [#uses=1]
 	store float %5, float* %6, align 4
 	%7 = add i32 %i.03, 10		; <i32> [#uses=3]
@@ -21,7 +21,7 @@ bb:		; preds = %bb, %entry
 	%9 = load float* %8, align 4		; <float> [#uses=1]
 	%10 = getelementptr float* %B, i32 %7		; <float*> [#uses=1]
 	%11 = load float* %10, align 4		; <float> [#uses=1]
-	%12 = add float %9, %11		; <float> [#uses=1]
+	%12 = fadd float %9, %11		; <float> [#uses=1]
 	%13 = getelementptr float* %C, i32 %7		; <float*> [#uses=1]
 	store float %12, float* %13, align 4
 	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
index cc93b4c..aaa2f84 100644
--- a/test/CodeGen/X86/ga-offset.ll
+++ b/test/CodeGen/X86/ga-offset.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static > %t
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index 8fb6db35..5ed6ddb 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -4,11 +4,11 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
 
 define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
-  %y = mul <4 x double> %x, %z
+  %y = fmul <4 x double> %x, %z
   ret <4 x double> %y
 }
 
 define <8 x float> @bar(<8 x float> %x, <8 x float> %z) {
-  %y = add <8 x float> %x, %z
+  %y = fadd <8 x float> %x, %z
   ret <8 x float> %y
 }
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 91f2f2f..31d94d8 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -21,7 +21,7 @@ define void @test4(double %X) {
 }
 
 define void @test5(double %X) {
-        %Y = add double %X, 123.0
+        %Y = fadd double %X, 123.0
         call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y)
         ret void
 }
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index f679c7f..ca39c12 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -21,7 +21,7 @@ define <4 x float> @test2() nounwind {
 	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
 	%a = getresult {<4 x float>, <4 x float>} %mrv, 0
 	%b = getresult {<4 x float>, <4 x float>} %mrv, 1
-	%c = add <4 x float> %a, %b
+	%c = fadd <4 x float> %a, %b
 	ret <4 x float> %c
 }
 
diff --git a/test/CodeGen/X86/inline-asm-x-scalar.ll b/test/CodeGen/X86/inline-asm-x-scalar.ll
index d1bac0c..aafbbd1 100644
--- a/test/CodeGen/X86/inline-asm-x-scalar.ll
+++ b/test/CodeGen/X86/inline-asm-x-scalar.ll
@@ -17,7 +17,7 @@ define void @test3() {
 
 define void @test4() {
         %tmp1 = tail call float asm "", "=x,0,~{dirflag},~{fpsr},~{flags}"( float 0x47EFFFFFE0000000 ); <float> [#uses=1]
-        %tmp4 = sub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]
+        %tmp4 = fsub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]
         tail call void asm sideeffect "", "x,~{dirflag},~{fpsr},~{flags}"( float %tmp4 )
         ret void
 }
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 275feba..2208b2d 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -17,52 +17,52 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
 entry:
-	%0 = xor i32 %IA, 1		; <i32> [#uses=1]
-	%1 = xor i32 %IB, 1		; <i32> [#uses=1]
-	%2 = or i32 %1, %0		; <i32> [#uses=1]
-	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb2, label %bb13
+      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
+      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
+      %2 = or i32 %1, %0		; <i32> [#uses=1]
+      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+      br i1 %3, label %bb2, label %bb13
 
 bb:		; preds = %bb3
-	%4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
-	%5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
-	%6 = mul float %4, %5		; <float> [#uses=1]
-	%7 = add float %6, %Sum0.0		; <float> [#uses=1]
-	%indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
-	br label %bb2
+      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
+      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
+      %6 = fmul float %4, %5		; <float> [#uses=1]
+      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
+      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
+      br label %bb2
 
 bb2:		; preds = %entry, %bb
-	%B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
-	%Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
-	%indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
-	%N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
-	%A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
-	%B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
-	%8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
-	br i1 %8, label %bb3, label %bb4
+      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
+      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
+      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
+      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
+      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
+      br i1 %8, label %bb3, label %bb4
 
 bb3:		; preds = %bb2
-	%9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
-	%10 = and i64 %9, 15		; <i64> [#uses=1]
-	%11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
-	br i1 %11, label %bb4, label %bb
+      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
+      %10 = and i64 %9, 15		; <i64> [#uses=1]
+      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
+      br i1 %11, label %bb4, label %bb
 
 bb4:		; preds = %bb3, %bb2
-	%12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
-	%13 = and i64 %12, 15		; <i64> [#uses=1]
-	%14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
-	%15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
-	br i1 %14, label %bb6.preheader, label %bb10.preheader
+      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
+      %13 = and i64 %12, 15		; <i64> [#uses=1]
+      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
+      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
+      br i1 %14, label %bb6.preheader, label %bb10.preheader
 
 bb10.preheader:		; preds = %bb4
-	br i1 %15, label %bb9, label %bb12.loopexit
+      br i1 %15, label %bb9, label %bb12.loopexit
 
 bb6.preheader:		; preds = %bb4
-	br i1 %15, label %bb5, label %bb8.loopexit
+      br i1 %15, label %bb5, label %bb8.loopexit
 
 bb5:		; preds = %bb5, %bb6.preheader
-	%indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
-	%vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
+      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
+      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
 	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
 	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
 	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
@@ -78,8 +78,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
 	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
-	%20 = mul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
-	%21 = add <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
+	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
+	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
 	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
 	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
 	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
@@ -88,8 +88,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
 	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
-	%28 = mul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
-	%29 = add <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
+	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
+	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
 	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
 	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
 	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
@@ -98,8 +98,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
 	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
-	%36 = mul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
-	%37 = add <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
+	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
+	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
 	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
 	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
 	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
@@ -108,8 +108,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
 	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
-	%44 = mul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
-	%45 = add <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
+	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
+	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
 	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
 	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
 	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
@@ -132,8 +132,8 @@ bb7:		; preds = %bb7, %bb8.loopexit
 	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
 	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
-	%54 = mul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
-	%55 = add <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
+	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
+	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
 	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
 	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
 	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
@@ -185,23 +185,23 @@ bb9:		; preds = %bb9, %bb10.preheader
 	%71 = load <4 x float>* %70, align 1
 	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
-	%74 = mul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
-	%75 = add <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
+	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
+	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
 	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
 	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
-	%79 = mul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
-	%80 = add <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
+	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
+	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
 	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
 	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
-	%84 = mul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
-	%85 = add <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
+	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
+	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
 	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
 	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
-	%89 = mul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
-	%90 = add <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
+	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
+	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
 	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
 	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
 	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
@@ -224,8 +224,8 @@ bb11:		; preds = %bb11, %bb12.loopexit
 	%96 = load <4 x float>* %95, align 1
 	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
-	%99 = mul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
-	%100 = add <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
+	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
+	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
 	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
 	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
 	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
@@ -254,17 +254,17 @@ bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
 	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
 	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
 	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
-	%106 = add <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
-	%107 = add <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
-	%108 = add <4 x float> %106, %107		; <<4 x float>> [#uses=4]
+	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
+	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
+	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
 	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
 	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
-	%109 = add float %tmp23, %tmp21		; <float> [#uses=1]
+	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
 	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
 	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
-	%110 = add float %tmp19, %tmp17		; <float> [#uses=1]
-	%111 = add float %109, %110		; <float> [#uses=1]
-	%Sum0.254 = add float %111, %Sum0.1		; <float> [#uses=2]
+	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
+	%111 = fadd float %109, %110		; <float> [#uses=1]
+	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
 	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
 	br i1 %112, label %bb.nph56, label %bb16
 
@@ -283,8 +283,8 @@ bb14:		; preds = %bb14, %bb.nph56
 	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
 	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
 	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
-	%115 = mul float %113, %114		; <float> [#uses=1]
-	%Sum0.2 = add float %115, %Sum0.255		; <float> [#uses=2]
+	%115 = fmul float %113, %114		; <float> [#uses=1]
+	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
 	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
 	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb16, label %bb14
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index e102535..0bf347c 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -20,16 +20,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 10
@@ -48,16 +48,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -77,17 +77,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 10
@@ -107,17 +107,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -136,16 +136,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -164,16 +164,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
@@ -193,17 +193,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -223,17 +223,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
index 7ccfe85..639a7a6 100644
--- a/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -15,16 +15,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -43,16 +43,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 20
@@ -72,17 +72,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -102,17 +102,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 20
@@ -131,16 +131,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, %n
@@ -159,16 +159,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -188,17 +188,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, %n
@@ -218,17 +218,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -247,16 +247,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
@@ -275,16 +275,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -303,16 +303,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 0
@@ -332,17 +332,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -362,17 +362,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 0
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
index 2e754a8..5f7a83f 100644
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ b/test/CodeGen/X86/multiple-return-values.ll
@@ -2,7 +2,7 @@
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
-        %z = add float %b, 7.0
+        %z = fadd float %b, 7.0
 	ret i64 %y, float %z
 }
 
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 55c7654..1a7ee08 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -6,7 +6,7 @@
 
 define float @negfp(float %a, float %b) {
 entry:
-	%sub = sub float %a, %b		; <float> [#uses=1]
-	%neg = sub float -0.000000e+00, %sub		; <float> [#uses=1]
+	%sub = fsub float %a, %b		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
 	ret float %neg
 }
 \ No newline at end of file
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index 59a2bd0..689639f 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -843,14 +843,14 @@ entry:
 	%8 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
 	%9 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
 	%10 = load double* null, align 8		; <double> [#uses=2]
-	%11 = sub double -0.000000e+00, %10		; <double> [#uses=1]
+	%11 = fsub double -0.000000e+00, %10		; <double> [#uses=1]
 	%12 = load double* null, align 8		; <double> [#uses=2]
 	%13 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=1]
 	%14 = load double* %13, align 8		; <double> [#uses=2]
-	%15 = sub double -0.000000e+00, %14		; <double> [#uses=1]
+	%15 = fsub double -0.000000e+00, %14		; <double> [#uses=1]
 	%16 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
 	%17 = load double* %16, align 8		; <double> [#uses=2]
-	%18 = sub double -0.000000e+00, %17		; <double> [#uses=1]
+	%18 = fsub double -0.000000e+00, %17		; <double> [#uses=1]
 	%19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
 	%20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
 	%21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6		; <double*> [#uses=0]
@@ -866,28 +866,28 @@ entry:
 	%31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
 	%32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=1]
 	%33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%34 = mul double %17, %5		; <double> [#uses=1]
-	%35 = add double 0.000000e+00, %34		; <double> [#uses=1]
-	%36 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%37 = mul double %14, %3		; <double> [#uses=1]
-	%38 = add double %36, %37		; <double> [#uses=1]
-	%39 = mul double %12, %4		; <double> [#uses=1]
-	%40 = add double %38, %39		; <double> [#uses=1]
-	%41 = mul double %5, %11		; <double> [#uses=1]
-	%42 = add double %40, %41		; <double> [#uses=2]
+	%34 = fmul double %17, %5		; <double> [#uses=1]
+	%35 = fadd double 0.000000e+00, %34		; <double> [#uses=1]
+	%36 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%37 = fmul double %14, %3		; <double> [#uses=1]
+	%38 = fadd double %36, %37		; <double> [#uses=1]
+	%39 = fmul double %12, %4		; <double> [#uses=1]
+	%40 = fadd double %38, %39		; <double> [#uses=1]
+	%41 = fmul double %5, %11		; <double> [#uses=1]
+	%42 = fadd double %40, %41		; <double> [#uses=2]
 	store double %42, double* %32, align 8
-	%43 = mul double %2, %15		; <double> [#uses=1]
-	%44 = add double %43, 0.000000e+00		; <double> [#uses=1]
-	%45 = mul double %3, %18		; <double> [#uses=1]
-	%46 = add double %44, %45		; <double> [#uses=1]
-	%47 = mul double %10, %4		; <double> [#uses=1]
-	%48 = add double %46, %47		; <double> [#uses=1]
-	%49 = mul double %12, %5		; <double> [#uses=1]
-	%50 = add double %48, %49		; <double> [#uses=2]
+	%43 = fmul double %2, %15		; <double> [#uses=1]
+	%44 = fadd double %43, 0.000000e+00		; <double> [#uses=1]
+	%45 = fmul double %3, %18		; <double> [#uses=1]
+	%46 = fadd double %44, %45		; <double> [#uses=1]
+	%47 = fmul double %10, %4		; <double> [#uses=1]
+	%48 = fadd double %46, %47		; <double> [#uses=1]
+	%49 = fmul double %12, %5		; <double> [#uses=1]
+	%50 = fadd double %48, %49		; <double> [#uses=2]
 	store double %50, double* %33, align 8
-	%51 = mul double %35, 2.000000e+00		; <double> [#uses=1]
-	%52 = mul double %42, 2.000000e+00		; <double> [#uses=1]
-	%53 = mul double %50, 2.000000e+00		; <double> [#uses=1]
+	%51 = fmul double %35, 2.000000e+00		; <double> [#uses=1]
+	%52 = fmul double %42, 2.000000e+00		; <double> [#uses=1]
+	%53 = fmul double %50, 2.000000e+00		; <double> [#uses=1]
 	%54 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %51, double* %54, align 8
 	%55 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 39c6297..8cc1bec 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -5,8 +5,8 @@ declare double @sin(double %f)
 
 define double @foo(double %e)
 {
-  %f = sub double 0.0, %e
+  %f = fsub double 0.0, %e
   %g = call double @sin(double %f)
-  %h = sub double 0.0, %g
+  %h = fsub double 0.0, %g
   ret double %h
 }
diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
index a95b564..8dcd23a 100644
--- a/test/CodeGen/X86/peep-test-0.ll
+++ b/test/CodeGen/X86/peep-test-0.ll
@@ -11,7 +11,7 @@ bb:
 	%i.03 = add i64 %indvar, %n
 	%0 = getelementptr double* %d, i64 %i.03
 	%1 = load double* %0, align 8
-	%2 = mul double %1, 3.000000e+00
+	%2 = fmul double %1, 3.000000e+00
 	store double %2, double* %0, align 8
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
index b4698e3..85e3bf2 100644
--- a/test/CodeGen/X86/peep-test-1.ll
+++ b/test/CodeGen/X86/peep-test-1.ll
@@ -11,7 +11,7 @@ bb:
 	%i.03 = sub i32 %n, %indvar
 	%1 = getelementptr double* %p, i32 %i.03
 	%2 = load double* %1, align 4
-	%3 = mul double %2, 2.930000e+00
+	%3 = fmul double %2, 2.930000e+00
 	store double %3, double* %1, align 4
 	%4 = add i32 %i.03, -1
 	%phitmp = icmp slt i32 %4, 0
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 789a4ba..3bbc55d 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -8,16 +8,16 @@ entry:
 	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = trunc i64 %p2.0 to i32		; <i32> [#uses=1]
 	%3 = sitofp i32 %2 to double		; <double> [#uses=1]
-	%4 = add double %1, %3		; <double> [#uses=1]
-	%5 = mul double %4, 5.000000e-01		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
+	%5 = fmul double %4, 5.000000e-01		; <double> [#uses=1]
 	%6 = lshr i64 %p1.0, 32		; <i64> [#uses=1]
 	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
 	%8 = sitofp i32 %7 to double		; <double> [#uses=1]
 	%9 = lshr i64 %p2.0, 32		; <i64> [#uses=1]
 	%10 = trunc i64 %9 to i32		; <i32> [#uses=1]
 	%11 = sitofp i32 %10 to double		; <double> [#uses=1]
-	%12 = add double %8, %11		; <double> [#uses=1]
-	%13 = mul double %12, 5.000000e-01		; <double> [#uses=1]
+	%12 = fadd double %8, %11		; <double> [#uses=1]
+	%13 = fmul double %12, 5.000000e-01		; <double> [#uses=1]
 	%mrv3 = insertvalue %struct.dpoint undef, double %5, 0		; <%struct.dpoint> [#uses=1]
 	%mrv4 = insertvalue %struct.dpoint %mrv3, double %13, 1		; <%struct.dpoint> [#uses=1]
 	ret %struct.dpoint %mrv4
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 3f6c365..96976b8 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -12,9 +12,9 @@ entry:
 	%tmp1 = load float* %tmp		; <float> [#uses=1]
 	%tmp2 = getelementptr %struct.anon* %p, i32 0, i32 1		; <float*> [#uses=1]
 	%tmp3 = load float* %tmp2		; <float> [#uses=1]
-	%neg = sub float -0.000000e+00, %tmp1		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %tmp1		; <float> [#uses=1]
 	%conv = fpext float %neg to double		; <double> [#uses=1]
-	%neg4 = sub float -0.000000e+00, %tmp3		; <float> [#uses=1]
+	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
 	%conv5 = fpext float %neg4 to double		; <double> [#uses=1]
 	%call = call i32 (...)* @printf( i8* getelementptr ([17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
 	ret void
diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
index a1ed0c2..73f5101 100644
--- a/test/CodeGen/X86/pr3154.ll
+++ b/test/CodeGen/X86/pr3154.ll
@@ -22,7 +22,7 @@ bb:		; preds = %entry
 bb19:		; preds = %bb, %entry
 	%data15.0 = phi double* [ %7, %bb ], [ %3, %entry ]		; <double*> [#uses=5]
 	%8 = sitofp i32 %len to double		; <double> [#uses=1]
-	%9 = sub double %8, 1.000000e+00		; <double> [#uses=1]
+	%9 = fsub double %8, 1.000000e+00		; <double> [#uses=1]
 	%10 = fdiv double 2.000000e+00, %9		; <double> [#uses=1]
 	store double %10, double* %c, align 8
 	%11 = ashr i32 %len, 1		; <i32> [#uses=3]
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index 36d4a5d..d4a9810 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -6,9 +6,9 @@ define void @foo(double* nocapture %P) nounwind {
 entry:
 	%0 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
 	%1 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
-	%2 = mul double %0, %0		; <double> [#uses=1]
-	%3 = mul double %1, %1		; <double> [#uses=1]
-	%4 = add double %2, %3		; <double> [#uses=1]
+	%2 = fmul double %0, %0		; <double> [#uses=1]
+	%3 = fmul double %1, %1		; <double> [#uses=1]
+	%4 = fadd double %2, %3		; <double> [#uses=1]
 	store double %4, double* %P, align 8
 	ret void
 }
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
index 99a46b6..4f9a582 100644
--- a/test/CodeGen/X86/pre-split1.ll
+++ b/test/CodeGen/X86/pre-split1.ll
@@ -5,17 +5,17 @@
 define void @test(double* %P, i32 %cond) nounwind {
 entry:
 	%0 = load double* %P, align 8		; <double> [#uses=1]
-	%1 = add double %0, 4.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %0, 4.000000e+00		; <double> [#uses=2]
 	%2 = icmp eq i32 %cond, 0		; <i1> [#uses=1]
 	br i1 %2, label %bb1, label %bb
 
 bb:		; preds = %entry
-	%3 = add double %1, 4.000000e+00		; <double> [#uses=1]
+	%3 = fadd double %1, 4.000000e+00		; <double> [#uses=1]
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
 	%A.0 = phi double [ %3, %bb ], [ %1, %entry ]		; <double> [#uses=1]
-	%4 = mul double %A.0, 4.000000e+00		; <double> [#uses=1]
+	%4 = fmul double %A.0, 4.000000e+00		; <double> [#uses=1]
 	%5 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
 	store double %4, double* %P, align 8
 	ret void
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
index c3e18c4..60297e9 100644
--- a/test/CodeGen/X86/pre-split10.ll
+++ b/test/CodeGen/X86/pre-split10.ll
@@ -7,9 +7,9 @@ entry:
 bb14.i:		; preds = %bb14.i, %entry
 	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %0, %bb14.i ]		; <i32> [#uses=1]
 	%0 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%1 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%2 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%3 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%exitcond75.i = icmp eq i32 %0, 32		; <i1> [#uses=1]
 	br i1 %exitcond75.i, label %bb24.i, label %bb14.i
 
@@ -37,13 +37,13 @@ bb7.i.i:		; preds = %bb35.preheader.i, %bb5.i.i
 	br label %bb35.preheader.i
 
 bb35.preheader.i:		; preds = %bb7.i.i, %bb33.i
-	%9 = sub double 0.000000e+00, %4		; <double> [#uses=1]
+	%9 = fsub double 0.000000e+00, %4		; <double> [#uses=1]
 	store double %9, double* null, align 8
-	%10 = sub double 0.000000e+00, %5		; <double> [#uses=1]
+	%10 = fsub double 0.000000e+00, %5		; <double> [#uses=1]
 	store double %10, double* null, align 8
-	%11 = sub double 0.000000e+00, %6		; <double> [#uses=1]
+	%11 = fsub double 0.000000e+00, %6		; <double> [#uses=1]
 	store double %11, double* null, align 8
-	%12 = sub double 0.000000e+00, %7		; <double> [#uses=1]
+	%12 = fsub double 0.000000e+00, %7		; <double> [#uses=1]
 	store double %12, double* null, align 8
 	br i1 false, label %bb7.i.i, label %bb5.i.i
 }
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
index 97401b3..a570f73 100644
--- a/test/CodeGen/X86/pre-split4.ll
+++ b/test/CodeGen/X86/pre-split4.ll
@@ -10,14 +10,14 @@ bb:		; preds = %bb, %entry
 	%Flint.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %5, %bb ]		; <double> [#uses=1]
 	%twoThrd.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=1]
 	%0 = tail call double @llvm.pow.f64(double 0x3FE5555555555555, double 0.000000e+00)		; <double> [#uses=1]
-	%1 = add double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
+	%1 = fadd double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
 	%2 = tail call double @sin(double %k.0.reg2mem.0) nounwind readonly		; <double> [#uses=1]
-	%3 = mul double 0.000000e+00, %2		; <double> [#uses=1]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=1]
 	%4 = fdiv double 1.000000e+00, %3		; <double> [#uses=1]
         store double %Flint.0.reg2mem.0, double* null
         store double %twoThrd.0.reg2mem.0, double* null
-	%5 = add double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
-	%6 = add double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
+	%5 = fadd double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
+	%6 = fadd double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
 	br label %bb
 }
 
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
index d353825..b83003f 100644
--- a/test/CodeGen/X86/pre-split5.ll
+++ b/test/CodeGen/X86/pre-split5.ll
@@ -40,7 +40,7 @@ bb28:		; preds = %bb14
 
 bb30:		; preds = %bb36, %bb28
 	%m.1.reg2mem.0 = phi i32 [ %m.0, %bb36 ], [ 0, %bb28 ]		; <i32> [#uses=1]
-	%1 = mul double 0.000000e+00, %0		; <double> [#uses=1]
+	%1 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
 	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
 	br i1 false, label %bb36, label %bb35
 
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
index 7808223..e771b80 100644
--- a/test/CodeGen/X86/pre-split6.ll
+++ b/test/CodeGen/X86/pre-split6.ll
@@ -20,14 +20,14 @@ bb.nph:		; preds = %entry
 bb9.i:		; preds = %bb.nph
 	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
 	%4 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
-	%5 = mul double %4, 0.000000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
 	%6 = tail call double @asin(double %5) nounwind readonly		; <double> [#uses=0]
 	unreachable
 
 bb13.i:		; preds = %bb.nph
 	%7 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
 	%8 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%9 = mul double %7, %8		; <double> [#uses=1]
+	%9 = fmul double %7, %8		; <double> [#uses=1]
 	%10 = tail call double @asin(double %9) nounwind readonly		; <double> [#uses=0]
 	unreachable
 
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
index 7f7b933..cd9d205 100644
--- a/test/CodeGen/X86/pre-split7.ll
+++ b/test/CodeGen/X86/pre-split7.ll
@@ -17,15 +17,15 @@ entry:
 
 bb:		; preds = %bb, %entry
 	%0 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%1 = add double 0.000000e+00, %0		; <double> [#uses=2]
+	%1 = fadd double 0.000000e+00, %0		; <double> [#uses=2]
 	%2 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%3 = sub double %1, %2		; <double> [#uses=2]
+	%3 = fsub double %1, %2		; <double> [#uses=2]
 	store double %3, double* @axis_slope_angle, align 8
 	%4 = fdiv double %1, 2.000000e+00		; <double> [#uses=1]
 	%5 = tail call double @sin(double %4) nounwind readonly		; <double> [#uses=1]
-	%6 = mul double 0.000000e+00, %5		; <double> [#uses=1]
+	%6 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
 	%7 = tail call double @tan(double %3) nounwind readonly		; <double> [#uses=0]
-	%8 = add double 0.000000e+00, %6		; <double> [#uses=1]
+	%8 = fadd double 0.000000e+00, %6		; <double> [#uses=1]
 	store double %8, double* @object_distance, align 8
 	br label %bb
 
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index eb6d49f..2259819 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -19,12 +19,12 @@ bb:		; preds = %bb9.i, %entry
 	br i1 %1, label %bb9.i, label %bb13.i
 
 bb9.i:		; preds = %bb
-	%2 = sub double %.rle4, %0		; <double> [#uses=0]
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
 	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%4 = mul double 0.000000e+00, %0		; <double> [#uses=1]
+	%4 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
 	%5 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%6 = mul double %4, 0.000000e+00		; <double> [#uses=1]
-	%7 = add double %6, 0.000000e+00		; <double> [#uses=1]
+	%6 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
+	%7 = fadd double %6, 0.000000e+00		; <double> [#uses=1]
 	br i1 false, label %return, label %bb
 
 bb13.i:		; preds = %bb
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index bfafe85..1be960f 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -21,13 +21,13 @@ bb:		; preds = %bb9.i, %entry
 	br i1 %1, label %bb9.i, label %bb13.i
 
 bb9.i:		; preds = %bb
-	%2 = sub double %.rle4, %0		; <double> [#uses=0]
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
 	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
 	%4 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%5 = mul double %4, %0		; <double> [#uses=1]
+	%5 = fmul double %4, %0		; <double> [#uses=1]
 	%6 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%7 = mul double %5, 0.000000e+00		; <double> [#uses=1]
-	%8 = add double %7, 0.000000e+00		; <double> [#uses=1]
+	%7 = fmul double %5, 0.000000e+00		; <double> [#uses=1]
+	%8 = fadd double %7, 0.000000e+00		; <double> [#uses=1]
 	br i1 false, label %return, label %bb
 
 bb13.i:		; preds = %bb
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
new file mode 100644
index 0000000..dea7d7e
--- /dev/null
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep subq %t | count 1
+; RUN: grep addq %t | count 1
+
+define x86_fp80 @f0(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index d9ef6fe..4c983b0 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static -aggressive-remat | grep xmm | count 2
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -aggressive-remat | grep xmm | count 2
 
 declare void @bar() nounwind
 
diff --git a/test/CodeGen/X86/shrink-fp-const1.ll b/test/CodeGen/X86/shrink-fp-const1.ll
index 966e69e..3406aee 100644
--- a/test/CodeGen/X86/shrink-fp-const1.ll
+++ b/test/CodeGen/X86/shrink-fp-const1.ll
@@ -2,6 +2,6 @@
 ; PR1264
 
 define double @foo(double %x) {
-        %y = mul double %x, 5.000000e-01
+        %y = fmul double %x, 5.000000e-01
         ret double %y
 }
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index dedd948..8b87f74 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -8,7 +8,7 @@ entry:
 	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
 	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
 	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
 	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index 7fa8fed..0c697de 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -22,6 +22,6 @@ declare void @llvm.va_end(i8*) nounwind
 
 define float @t2(float %a, float %b) nounwind readnone {
 entry:
-	%0 = add float %a, %b		; <float> [#uses=1]
+	%0 = fadd float %a, %b		; <float> [#uses=1]
 	ret float %0
 }
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index 39debaa..5a888b2 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -2,11 +2,11 @@
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
-  %z = mul <4 x float> %t, %x
+  %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p
-  %z = mul <2 x double> %t, %x
+  %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index b5b261d..ba693a2 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -2,11 +2,11 @@
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
-  %z = mul <4 x float> %t, %x
+  %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p, align 8
-  %z = mul <2 x double> %t, %x
+  %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index cff1f7f..d8c3283 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -6,7 +6,7 @@ define float @tst1(float %a, float %b) {
 }
 
 define double @tst2(double %a, float %b, float %c) {
-	%tmp1 = add float %b, %c
+	%tmp1 = fadd float %b, %c
 	%tmp2 = fpext float %tmp1 to double
 	%tmp = tail call double @copysign( double %a, double %tmp2 )
 	ret double %tmp
diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
index fc0df06..470d146 100644
--- a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
+++ b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
@@ -6,7 +6,7 @@
 
 define float @bar(<4 x float> %v) {
   %s = extractelement <4 x float> %v, i32 3
-  %t = add float %s, 1.0
+  %t = fadd float %s, 1.0
   ret float %t
 }
 define float @baz(<4 x float> %v) {
diff --git a/test/CodeGen/X86/sse41-pmovx.ll b/test/CodeGen/X86/sse41-pmovx.ll
index 71e5e25..c8cfec9 100644
--- a/test/CodeGen/X86/sse41-pmovx.ll
+++ b/test/CodeGen/X86/sse41-pmovx.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | not grep movq
 ; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovsxbd
 ; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovsxwd
-; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovsxbq
+; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovzxbq
 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=sse41 -mtriple=x86_64-apple-darwin | grep movq | count 1
 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=sse41 -mtriple=x86_64-unknown-linux-gnu | not grep movq
 
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 1e6c2b2..dda6f0d 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -14,7 +14,7 @@ entry:
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
-	%tmp6 = add double %tmp4, %tmp2		; <double> [#uses=1]
+	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store double %tmp6, double* %P, align 8
 	ret void
 }
diff --git a/test/CodeGen/X86/storetrunc-fp.ll b/test/CodeGen/X86/storetrunc-fp.ll
index 655cbd6..945cf48 100644
--- a/test/CodeGen/X86/storetrunc-fp.ll
+++ b/test/CodeGen/X86/storetrunc-fp.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llc -march=x86 | not grep flds
 
 define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
-	%c = add x86_fp80 %a, %b
+	%c = fadd x86_fp80 %a, %b
 	%d = fptrunc x86_fp80 %c to float
 	store float %d, float* %fp
 	ret void
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 97f33d8..277a443 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -14,7 +14,7 @@ bb:
 	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
 	%tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0
 	%tmp3 = load float* %tmp2, align 4
-	%tmp4 = mul float %tmp3, 2.000000e+00
+	%tmp4 = fmul float %tmp3, 2.000000e+00
 	%tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0
 	store float %tmp4, float* %tmp5, align 4
 	%tmp8 = shl i32 %i.019.0, 1
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 9a011f7..3fe4cd1 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -9,7 +9,7 @@ entry:
 	%tmp.i3 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
 	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
-	%tmp.i2 = add <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
-	%tmp.i = add <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
+	%tmp.i2 = fadd <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp.i
 }
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index f1f009e..ee7567c 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -6,7 +6,7 @@
 
 define void @test1(<4 x float>* %F, float* %f) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp7 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
 	store float %tmp2, float* %f
 	ret void
@@ -14,7 +14,7 @@ define void @test1(<4 x float>* %F, float* %f) nounwind {
 
 define float @test2(<4 x float>* %F, float* %f) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp7 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
 	ret float %tmp2
 }
@@ -29,7 +29,7 @@ define void @test3(float* %R, <4 x float>* %P1) nounwind {
 define double @test4(double %A) nounwind {
 	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]
 	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]
-	%tmp3 = add double %tmp2, %A		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp2, %A		; <double> [#uses=1]
 	ret double %tmp3
 }
 
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index 03765d6..a801472 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,11 +1,11 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
 
 define <4 x float> @t1(<4 x float> %Q) {
-        %tmp15 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
+        %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
 	ret <4 x float> %tmp15
 }
 
 define <4 x float> @t2(<4 x float> %Q) {
-        %tmp15 = sub <4 x float> zeroinitializer, %Q
+        %tmp15 = fsub <4 x float> zeroinitializer, %Q
 	ret <4 x float> %tmp15
 }
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index 86f1306..7882839 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -7,9 +7,9 @@
 define void @test(<4 x float>* %F, float %f) {
 entry:
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -18,12 +18,12 @@ define void @test2(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	store float %f, float* %tmp.upgrd.1
 	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -32,7 +32,7 @@ define void @test3(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	%tmp.upgrd.3 = load float* %tmp.upgrd.2		; <float> [#uses=1]
@@ -45,7 +45,7 @@ entry:
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp5.lhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
 	%tmp5.rhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
-	%tmp5 = add float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
+	%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
 	store float %tmp5, float* %f
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index e032c5b..3a9464c 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -5,7 +5,7 @@ define void @test(<4 x float>* %F, i32 %I) {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
 	%f = sitofp i32 %I to float		; <float> [#uses=1]
 	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]
-	%tmp18 = add <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
+	%tmp18 = fadd <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp18, <4 x float>* %F
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
index 6e03afb..f895762 100644
--- a/test/CodeGen/X86/vec_logical.ll
+++ b/test/CodeGen/X86/vec_logical.ll
@@ -4,7 +4,7 @@
 ; RUN: grep movaps %t | count 2
 
 define void @t(<4 x float> %A) {
-	%tmp1277 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
+	%tmp1277 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
 	store <4 x float> %tmp1277, <4 x float>* null
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
index ede7ab2..ecb825b 100644
--- a/test/CodeGen/X86/vec_select.ll
+++ b/test/CodeGen/X86/vec_select.ll
@@ -3,7 +3,7 @@
 define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
         %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
         %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
-        %tmp9 = mul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
         %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
         %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
         store <4 x float> %iftmp.38.0, <4 x float>* %A
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 6baf47a..231ac0c 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -10,8 +10,8 @@ target triple = "i686-apple-cl.1.0"
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
-	%sub = sub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
-	%mul = mul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
-	%add = add <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
+	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
+	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
+	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
 	ret <8 x float> %add
 }
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 6de1038..018b4cf 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -5,8 +5,8 @@
 define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
         %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
         %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
-        %tmp9 = add <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
-        %tmp21 = sub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 }
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
index 1acd73f..e356f24 100644
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ b/test/CodeGen/X86/vec_shuffle-5.ll
@@ -6,7 +6,7 @@ define void @test() nounwind {
         %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
         %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
-        %tmp4 = add <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp4, <4 x float>* null
         ret void
 }
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 64222e4..89914fd 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -7,7 +7,7 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2		; <<4 x float>> [#uses=1]
 	%tmp6 = insertelement <4 x float> %tmp4, float %X, i32 3		; <<4 x float>> [#uses=1]
 	%tmp8 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
-	%tmp10 = mul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
+	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp10, <4 x float>* %P
 	ret void
 }
@@ -16,7 +16,7 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1		; <<2 x double>> [#uses=1]
 	%tmp4 = load <2 x double>* %Q		; <<2 x double>> [#uses=1]
-	%tmp6 = mul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
+	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
 	store <2 x double> %tmp6, <2 x double>* %P
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 0f15f92..69900a6 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -20,8 +20,8 @@ define i16 @test1(float %f) nounwind {
 }
 
 define i16 @test2(float %f) nounwind {
-	%tmp28 = sub float %f, 1.000000e+00		; <float> [#uses=1]
-	%tmp37 = mul float %tmp28, 5.000000e-01		; <float> [#uses=1]
+	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]
+	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]
 	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
 	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
 	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 17b378f..0a7a543 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -2,7 +2,7 @@
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
-        %S = add <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
         store <4 x float> %S, <4 x float>* %P
         ret void
 }
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index e7f3b92..8e1de2f 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -15,7 +15,7 @@
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
         %p = load %f1* %P               ; <%f1> [#uses=1]
         %q = load %f1* %Q               ; <%f1> [#uses=1]
-        %R = add %f1 %p, %q             ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
         store %f1 %R, %f1* %S
         ret void
 }
@@ -23,7 +23,7 @@ define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
         %p = load %f2* %P               ; <%f2> [#uses=1]
         %q = load %f2* %Q               ; <%f2> [#uses=1]
-        %R = add %f2 %p, %q             ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
         store %f2 %R, %f2* %S
         ret void
 }
@@ -31,7 +31,7 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %p, %q             ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -39,7 +39,7 @@ define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = add %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -47,7 +47,7 @@ define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = mul %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -64,21 +64,21 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
 
 define void @test_cst(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, undef          ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -115,7 +115,7 @@ define double @test_extract_elt2(%d8* %P) {
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
         %tmp = load %f4* %b             ; <%f4> [#uses=1]
-        %tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
         %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
         %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           ; <%i4> [#uses=1]
         store %i4 %tmp4, %i4* %a
@@ -138,7 +138,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
 }
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 59548c3..7b0bb33 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -34,8 +34,8 @@ forbody:		; preds = %forcond
 	%arrayidx6 = getelementptr <3 x float>* %tmp5, i32 %tmp4		; <<3 x float>*> [#uses=1]
 	%tmp7 = load <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
 	%tmp8 = load <3 x float>* %v		; <<3 x float>> [#uses=1]
-	%mul = mul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
-	%add = add <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
+	%mul = fmul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
+	%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
 	store <3 x float> %add, <3 x float>* %arrayidx
 	br label %forinc
 
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index d3bbd5f..a676f33 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -5,7 +5,7 @@
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
-	%val = add <3 x float> %x, %src2;
+	%val = fadd <3 x float> %x, %src2;
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
index d25e02e..c2dfa3d 100644
--- a/test/CodeGen/X86/widen_shuffle-2.ll
+++ b/test/CodeGen/X86/widen_shuffle-2.ll
@@ -5,7 +5,7 @@
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
-	%val = add <3 x float> %x, %src2;
+	%val = fadd <3 x float> %x, %src2;
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
index 735e988..b9333c9 100644
--- a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
@@ -8,11 +8,11 @@ bb113:		; preds = %entry
 	ret double 0.000000e+00
 
 bb129:		; preds = %entry
-	%tmp134 = sub double %b, %a		; <double> [#uses=1]
-	%tmp136 = sub double %tmp134, %c		; <double> [#uses=1]
-	%tmp138 = add double %tmp136, %d		; <double> [#uses=1]
-	%tmp140 = sub double %tmp138, %e		; <double> [#uses=1]
-	%tmp142 = add double %tmp140, %f		; <double> [#uses=1]
-	%tmp.0 = mul double %tmp142, 0.000000e+00		; <double> [#uses=1]
+	%tmp134 = fsub double %b, %a		; <double> [#uses=1]
+	%tmp136 = fsub double %tmp134, %c		; <double> [#uses=1]
+	%tmp138 = fadd double %tmp136, %d		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %e		; <double> [#uses=1]
+	%tmp142 = fadd double %tmp140, %f		; <double> [#uses=1]
+	%tmp.0 = fmul double %tmp142, 0.000000e+00		; <double> [#uses=1]
 	ret double %tmp.0
 }
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index e4426fd..3fb7b01 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -2,7 +2,7 @@
 ; RUN: grep "xor" %t1.s | count 1
 define i1 @test(double %F) nounwind {
 entry:
-	%0 = sub double -0.000000e+00, %F
+	%0 = fsub double -0.000000e+00, %F
 	%1 = fcmp olt double 0.000000e+00, %0
 	ret i1 %1
 }
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index 628be16..30f9330 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -2,10 +2,10 @@
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
-	%X = add double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
-	%Y = sub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
+	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
+	%Y = fsub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
 	%Z = fcmp oeq double %X, %Y		; <i1> [#uses=0]
-	add double %Y, 0.000000e+00		; <double>:1 [#uses=0]
+	fadd double %Y, 0.000000e+00		; <double>:1 [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index a119b40..2e8ecd5 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -3,13 +3,13 @@
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
-	%V = add double %D, 1.000000e+00		; <double> [#uses=2]
-	%W = sub double %V, %V		; <double> [#uses=3]
-	%X = mul double %W, %W		; <double> [#uses=2]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
 	%Y = fdiv double %X, %X		; <double> [#uses=2]
 	%Z = frem double %Y, %Y		; <double> [#uses=3]
 	%Z1 = fdiv double %Z, %W		; <double> [#uses=0]
-	%Q = add double %Z, %Arg		; <double> [#uses=1]
+	%Q = fadd double %Z, %Arg		; <double> [#uses=1]
 	%R = bitcast double %Q to double		; <double> [#uses=1]
 	store double %R, double* %DP
 	ret double %Z
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index 235c402..b917693 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -3,10 +3,10 @@
 
 
 define i32 @main() {
-	%double1 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
-	%double2 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
-	%float1 = add float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
-	%float2 = add float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%double1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%double2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%float1 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%float2 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
 	%test49 = fcmp oeq float %float1, %float2		; <i1> [#uses=0]
 	%test50 = fcmp oge float %float1, %float2		; <i1> [#uses=0]
 	%test51 = fcmp ogt float %float1, %float2		; <i1> [#uses=0]
diff --git a/test/Feature/ppcld.ll b/test/Feature/ppcld.ll
index f21eb43..393a491 100644
--- a/test/Feature/ppcld.ll
+++ b/test/Feature/ppcld.ll
@@ -15,7 +15,7 @@ entry:
 	%tmp = load float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = load double* @d		; <double> [#uses=1]
-	%tmp3 = mul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to ppc_fp128		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* @ld
 	br label %return
diff --git a/test/Feature/sparcld.ll b/test/Feature/sparcld.ll
index 2e99bda..095f6f6 100644
--- a/test/Feature/sparcld.ll
+++ b/test/Feature/sparcld.ll
@@ -13,7 +13,7 @@ entry:
 	%tmp = load float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = load double* @d		; <double> [#uses=1]
-	%tmp3 = mul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to fp128		; <fp128> [#uses=1]
 	store fp128 %tmp4, fp128* @ld
 	br label %return
diff --git a/test/Feature/x86ld.ll b/test/Feature/x86ld.ll
index 6904003..32005ae 100644
--- a/test/Feature/x86ld.ll
+++ b/test/Feature/x86ld.ll
@@ -15,7 +15,7 @@ entry:
 	%tmp = load float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = load double* @d		; <double> [#uses=1]
-	%tmp3 = mul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to x86_fp80		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp4, x86_fp80* @ld
 	br label %return
diff --git a/test/FrontendC/2009-01-20-k8.c b/test/FrontendC/2009-01-20-k8.c
index 627ab65..d28302b 100644
--- a/test/FrontendC/2009-01-20-k8.c
+++ b/test/FrontendC/2009-01-20-k8.c
@@ -1,3 +1,4 @@
 // RUN: %llvmgcc %s -S -march=k8
-// XTARGET: x86
+// XFAIL: *
+// XTARGET: x86,i386,i686
 long double x;
diff --git a/test/FrontendC/2009-05-04-EnumInreg.c b/test/FrontendC/2009-05-04-EnumInreg.c
index 8a76f5f..6dbdb54 100644
--- a/test/FrontendC/2009-05-04-EnumInreg.c
+++ b/test/FrontendC/2009-05-04-EnumInreg.c
@@ -1,5 +1,6 @@
 // RUN: %llvmgcc -S -m32 -mregparm=3 %s -emit-llvm -o - | grep {inreg %action}
-// XTARGET: x86
+// XFAIL: *
+// XTARGET: x86,i386,i686
 // PR3967
 
 enum kobject_action {
diff --git a/test/Other/2004-08-16-PackedSelect.ll b/test/Other/2004-08-16-PackedSelect.ll
index 6438316..c1d6214 100644
--- a/test/Other/2004-08-16-PackedSelect.ll
+++ b/test/Other/2004-08-16-PackedSelect.ll
@@ -5,7 +5,7 @@
 
 define void @main() {
         %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
-        %t1 = add <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
+        %t1 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
         %t2 = select i1 true, <4 x float> %t0, <4 x float> %t1          ; <<4 x float>> [#uses=1]
         store <4 x float> %t2, <4 x float>* @bar
         ret void
diff --git a/test/Other/2004-08-16-PackedSimple.ll b/test/Other/2004-08-16-PackedSimple.ll
index 5bb8b79..81cecd4 100644
--- a/test/Other/2004-08-16-PackedSimple.ll
+++ b/test/Other/2004-08-16-PackedSimple.ll
@@ -5,7 +5,7 @@
 
 define void @main() {
         %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
-        %t2 = add <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
+        %t2 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
         %t3 = select i1 false, <4 x float> %t0, <4 x float> %t2         ; <<4 x float>> [#uses=1]
         store <4 x float> %t3, <4 x float>* @bar
         ret void
diff --git a/test/Other/2004-08-20-PackedControlFlow.ll b/test/Other/2004-08-20-PackedControlFlow.ll
index 49aa606..3943570 100644
--- a/test/Other/2004-08-20-PackedControlFlow.ll
+++ b/test/Other/2004-08-20-PackedControlFlow.ll
@@ -12,7 +12,7 @@ C:              ; preds = %B
         ret void
 
 B:              ; preds = %A
-        %t2 = add %v4f %t0, %t0         ; <%v4f> [#uses=1]
+        %t2 = fadd %v4f %t0, %t0         ; <%v4f> [#uses=1]
         br label %C
 
 A:              ; preds = %0
diff --git a/test/Other/2009-06-05-no-implicit-float.ll b/test/Other/2009-06-05-no-implicit-float.ll
new file mode 100644
index 0000000..5addfe2
--- /dev/null
+++ b/test/Other/2009-06-05-no-implicit-float.ll
@@ -0,0 +1,4 @@
+
+; RUN: llvm-as < %s | opt -verify | llvm-dis | grep noimplicitfloat
+define void @f() noimplicitfloat {
+}
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 126db4c..c573e56 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -13,11 +13,11 @@ declare i1 @llvm.isunordered.f64(double, double)
 define double @T() {
         %A = call double @cos( double 0.000000e+00 )            ; <double> [#uses=1]
         %B = call double @sin( double 0.000000e+00 )            ; <double> [#uses=1]
-        %a = add double %A, %B          ; <double> [#uses=1]
+        %a = fadd double %A, %B          ; <double> [#uses=1]
         %C = call double @tan( double 0.000000e+00 )            ; <double> [#uses=1]
-        %b = add double %a, %C          ; <double> [#uses=1]
+        %b = fadd double %a, %C          ; <double> [#uses=1]
         %D = call double @sqrt( double 4.000000e+00 )           ; <double> [#uses=1]
-        %c = add double %b, %D          ; <double> [#uses=1]
+        %c = fadd double %b, %D          ; <double> [#uses=1]
         ret double %c
 }
 
diff --git a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
index 50dcf32..3b3f8ad 100644
--- a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
+++ b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
@@ -601,7 +601,7 @@ entry:
 	%tmp21362 = icmp eq i32 0, 0		; <i1> [#uses=2]
 	%tmp216 = sitofp i32 %pn_restart.0.ph to float		; <float> [#uses=1]
 	%tmp216.upgrd.177 = fpext float %tmp216 to double		; <double> [#uses=1]
-	%tmp217 = add double %tmp216.upgrd.177, 1.000000e+00		; <double> [#uses=1]
+	%tmp217 = fadd double %tmp216.upgrd.177, 1.000000e+00		; <double> [#uses=1]
 	%tmp835 = icmp sgt i32 %pn_restart.0.ph, 9		; <i1> [#uses=0]
 	store i32 0, i32* @nodes
 	store i32 0, i32* @qnodes
diff --git a/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll b/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll
index b4cb517..180105a 100644
--- a/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll
+++ b/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll
@@ -10,8 +10,8 @@ bb.nph:		; preds = %entry
 bb34:		; preds = %bb34, %bb.nph
 	%p.1 = phi float [ 0x3FE6A09E60000000, %bb.nph ], [ %tmp48, %bb34 ]		; <float> [#uses=1]
 	%tmp44 = load float* null		; <float> [#uses=1]
-	%tmp46 = sub float %tmp44, 0.000000e+00		; <float> [#uses=1]
-	%tmp48 = mul float %tmp46, %p.1		; <float> [#uses=1]
+	%tmp46 = fsub float %tmp44, 0.000000e+00		; <float> [#uses=1]
+	%tmp48 = fmul float %tmp46, %p.1		; <float> [#uses=1]
 	br i1 false, label %bb57, label %bb34
 
 bb57:		; preds = %bb34
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index 0a8dd49..779e7fb 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -4,6 +4,6 @@
 define double @foo() nounwind  {
 entry:
 	%tmp1 = volatile load double* @t0.1441, align 8		; <double> [#uses=2]
-	%tmp4 = mul double %tmp1, %tmp1		; <double> [#uses=1]
+	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
 	ret double %tmp4
 }
diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index 3464be9..8a0b5b3 100644
--- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -26,7 +26,7 @@ define double @test2() {
   %V1 = load double* getelementptr (%T* @G, i32 0, i32 0), align 16
   %V2 = load double* getelementptr (%T* @G, i32 0, i32 1), align 8
   %V3 = load double* getelementptr (%T* @G, i32 0, i32 2), align 16
-  %R = add double %V1, %V2
-  %R2 = add double %R, %V3
+  %R = fadd double %V1, %V2
+  %R2 = fadd double %R, %V3
   ret double %R2
 }
diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
index 6e33ae0..6fa139b 100644
--- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll
+++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -7,7 +7,7 @@ define internal float @foo() {
 
 define float @bar() {
         %tmp1 = call float (...)* bitcast (float ()* @foo to float (...)*)( )
-        %tmp2 = mul float %tmp1, 1.000000e+01           ; <float> [#uses=1]
+        %tmp2 = fmul float %tmp1, 1.000000e+01           ; <float> [#uses=1]
         ret float %tmp2
 }
 
diff --git a/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll b/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
index 903e81d..b2f8258 100644
--- a/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
+++ b/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
@@ -18,7 +18,7 @@ cond_true52:		; preds = %cond_true27
 cond_next182.i:		; preds = %cond_next182.i, %cond_true52
 	%decay.i.0 = phi i32 [ %tmp195.i.upgrd.1, %cond_next182.i ], [ %tmp152.i, %cond_true52 ]		; <i32> [#uses=1]
 	%tmp194.i53 = bitcast i32 %decay.i.0 to float		; <float> [#uses=1]
-	%tmp195.i = sub float %tmp194.i53, 8.000000e+00		; <float> [#uses=1]
+	%tmp195.i = fsub float %tmp194.i53, 8.000000e+00		; <float> [#uses=1]
 	%tmp195.i.upgrd.1 = bitcast float %tmp195.i to i32		; <i32> [#uses=1]
 	br i1 false, label %cond_next182.i, label %bb418.i.preheader
 
diff --git a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
index 6fc065f..be8b36f 100644
--- a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
@@ -6,7 +6,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 1.000000e+04		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -23,7 +23,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ -10.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 2.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 2.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, -1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -39,7 +39,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, -1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -54,7 +54,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 40.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, -1.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, -1.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
diff --git a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
index faf1da3..c947d3b 100644
--- a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
@@ -9,7 +9,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 2147483646.0e+0		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -24,7 +24,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 2147483647.0e+0		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
diff --git a/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll b/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
index 9fd0eb9..e611b1f 100644
--- a/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
@@ -6,6 +6,6 @@ entry:
 
 bb23.i91:		; preds = %bb23.i91, %entry
 	%result.0.i89 = phi ppc_fp128 [ 0xM00000000000000000000000000000000, %entry ], [ %0, %bb23.i91 ]		; <ppc_fp128> [#uses=2]
-	%0 = mul ppc_fp128 %result.0.i89, %result.0.i89		; <ppc_fp128> [#uses=1]
+	%0 = fmul ppc_fp128 %result.0.i89, %result.0.i89		; <ppc_fp128> [#uses=1]
 	br label %bb23.i91
 }
diff --git a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
index 700f294..e70d577 100644
--- a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
@@ -9,7 +9,7 @@ entry:
 loop_body:              
         %i = phi float [ %nexti, %loop_body ], [ 0.0, %entry ]          
         tail call void @foo()
-        %nexti = add float %i, 1.0              
+        %nexti = fadd float %i, 1.0
         %less = fcmp olt float %nexti, 2.0              
         br i1 %less, label %loop_body, label %done
 
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 76d48de..d7eb7bd 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -13,16 +13,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 10
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index 5ad0af4..c7cf0dd 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -2,8 +2,8 @@
 ; RUN:   grep mul | count 2
 
 define <4 x float> @test(<4 x float> %V) {
-        %Y = mul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
-        %Z = mul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
+        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
+        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
         ret <4 x float> %Z
 }
 
diff --git a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
index 60ee503..eaf10a3 100644
--- a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
+++ b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
@@ -3,7 +3,7 @@
 
 define <4 x float> @test(<4 x float> %tmp26, <4 x float> %tmp53) {
         ; (X+Y)-Y != X for fp vectors.
-        %tmp64 = add <4 x float> %tmp26, %tmp53         ; <<4 x float>> [#uses=1]
-        %tmp75 = sub <4 x float> %tmp64, %tmp53         ; <<4 x float>> [#uses=1]
+        %tmp64 = fadd <4 x float> %tmp26, %tmp53         ; <<4 x float>> [#uses=1]
+        %tmp75 = fsub <4 x float> %tmp64, %tmp53         ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp75
 }
diff --git a/test/Transforms/InstCombine/2008-07-16-fsub.ll b/test/Transforms/InstCombine/2008-07-16-fsub.ll
index 1d0554d..ca4174d 100644
--- a/test/Transforms/InstCombine/2008-07-16-fsub.ll
+++ b/test/Transforms/InstCombine/2008-07-16-fsub.ll
@@ -3,6 +3,6 @@
 
 define double @test(double %X) nounwind {
 	; fsub of self can't be optimized away.
-	%Y = sub double %X, %X
+	%Y = fsub double %X, %X
 	ret double %Y
 }
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 35c6567..298b9a1 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -4,6 +4,6 @@ define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
   %n = and i32 %m, %b
   %o = sitofp i32 %n to double
-  %p = add double %o, 1.0
+  %p = fadd double %o, 1.0
   ret double %p
 }
diff --git a/test/Transforms/InstCombine/dce-iterate.ll b/test/Transforms/InstCombine/dce-iterate.ll
index e222970..faefa8a 100644
--- a/test/Transforms/InstCombine/dce-iterate.ll
+++ b/test/Transforms/InstCombine/dce-iterate.ll
@@ -18,7 +18,7 @@ entry:
         %c = lshr i960 %sz101112.ins, 320               ; <i960> [#uses=1]
         %d = trunc i960 %c to i64               ; <i64> [#uses=1]
         %e = bitcast i64 %d to double           ; <double> [#uses=1]
-        %f = add double %b, %e
+        %f = fadd double %b, %e
 
         ret double %e
 }
diff --git a/test/Transforms/InstCombine/fpextend.ll b/test/Transforms/InstCombine/fpextend.ll
index 5971080..c212128 100644
--- a/test/Transforms/InstCombine/fpextend.ll
+++ b/test/Transforms/InstCombine/fpextend.ll
@@ -6,7 +6,7 @@ define void @test() nounwind  {
 entry:
 	%tmp = load float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp3 = add double %tmp1, 0.000000e+00		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp1, 0.000000e+00		; <double> [#uses=1]
 	%tmp34 = fptrunc double %tmp3 to float		; <float> [#uses=1]
 	store float %tmp34, float* @X, align 4
 	ret void
@@ -28,7 +28,7 @@ define void @test4() nounwind  {
 entry:
 	%tmp = load float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = sub double -0.000000e+00, %tmp1		; <double> [#uses=1]
+	%tmp2 = fsub double -0.000000e+00, %tmp1		; <double> [#uses=1]
 	%tmp34 = fptrunc double %tmp2 to float		; <float> [#uses=1]
 	store float %tmp34, float* @X, align 4
 	ret void
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 1a74025..9b5f7a5 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -20,7 +20,7 @@ define i32 @test3(i32 %A) {
 
 define double @test4(double %A) {
         ; This is safe for FP
-        %B = mul double 1.000000e+00, %A                ; <double> [#uses=1]
+        %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
 }
 
@@ -79,15 +79,7 @@ define i32 @test12(i8 %a, i32 %b) {
 ; PR2642
 define internal void @test13(<4 x float>*) {
 	load <4 x float>* %0, align 1
-	mul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
+	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
 }
-
-define internal void @test14(<4 x float>*) {
-	load <4 x float>* %0, align 1
-	mul <4 x float> %2, zeroinitializer
-	store <4 x float> %3, <4 x float>* %0, align 1
-	ret void
-}
-
diff --git a/test/Transforms/InstCombine/multi-use-or.ll b/test/Transforms/InstCombine/multi-use-or.ll
index 85a8b34..4804967 100644
--- a/test/Transforms/InstCombine/multi-use-or.ll
+++ b/test/Transforms/InstCombine/multi-use-or.ll
@@ -17,7 +17,7 @@ entry:
         %c = lshr i192 %sy222324.ins, 128               ; <i192> [#uses=1]
         %d = trunc i192 %c to i64               ; <i64> [#uses=1]
         %e = bitcast i64 %d to double           ; <double> [#uses=1]
-        %f = add double %b, %e
+        %f = fadd double %b, %e
 
 ;        ret double %e
         ret double %f
diff --git a/test/Transforms/InstCombine/shufflemask-undef.ll b/test/Transforms/InstCombine/shufflemask-undef.ll
index 2438417..a9e8d34 100644
--- a/test/Transforms/InstCombine/shufflemask-undef.ll
+++ b/test/Transforms/InstCombine/shufflemask-undef.ll
@@ -75,16 +75,16 @@ bb266.i:
 	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
 	shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>>:5 [#uses=1]
-	mul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
-	mul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
-	add <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
+	fmul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
+	fmul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
+	fadd <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
 	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone		; <<4 x float>>:9 [#uses=1]
 	%phitmp40 = bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 >		; <<4 x i32>> [#uses=1]
 	%tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 >		; <<4 x i32>> [#uses=1]
 	%tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float>		; <<4 x float>> [#uses=1]
-	add <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
-	mul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
+	fadd <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
+	fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
 	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:12 [#uses=1]
 	call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:13 [#uses=1]
 	%tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind		; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/signed-comparison.ll b/test/Transforms/InstCombine/signed-comparison.ll
index fdf150f..86e07ec7 100644
--- a/test/Transforms/InstCombine/signed-comparison.ll
+++ b/test/Transforms/InstCombine/signed-comparison.ll
@@ -14,7 +14,7 @@ bb:
 	%t0 = and i64 %indvar, 65535
 	%t1 = getelementptr double* %p, i64 %t0
 	%t2 = load double* %t1, align 8
-	%t3 = mul double %t2, 2.2
+	%t3 = fmul double %t2, 2.2
 	store double %t3, double* %t1, align 8
 	%i.04 = trunc i64 %indvar to i16
 	%t4 = add i16 %i.04, 1
diff --git a/test/Transforms/InstCombine/sitofp.ll b/test/Transforms/InstCombine/sitofp.ll
index c26c351..2bf7385 100644
--- a/test/Transforms/InstCombine/sitofp.ll
+++ b/test/Transforms/InstCombine/sitofp.ll
@@ -36,7 +36,7 @@ define i32 @test6(i32 %A) {
 	%C = and i32 %A, 32		; <i32> [#uses=1]
 	%D = sitofp i32 %B to double		; <double> [#uses=1]
 	%E = sitofp i32 %C to double		; <double> [#uses=1]
-	%F = add double %D, %E		; <double> [#uses=1]
+	%F = fadd double %D, %E		; <double> [#uses=1]
 	%G = fptosi double %F to i32		; <i32> [#uses=1]
 	ret i32 %G
 }
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 03e070f..95df8c6 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   grep {sub float}
+; RUN:   grep {fadd float}
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   grep {mul float}
+; RUN:   grep {fmul float}
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
 ; RUN:   not grep {insertelement.*0.00}
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
@@ -26,7 +26,7 @@ entry:
 }
 
 define i32 @test2(float %f) {
-        %tmp5 = mul float %f, %f
+        %tmp5 = fmul float %f, %f
         %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0             
         %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1    
         %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2  
diff --git a/test/Transforms/InstCombine/vec_narrow.ll b/test/Transforms/InstCombine/vec_narrow.ll
index 9063148..e444c2a 100644
--- a/test/Transforms/InstCombine/vec_narrow.ll
+++ b/test/Transforms/InstCombine/vec_narrow.ll
@@ -5,7 +5,7 @@
 
 define float @test(%V %A, %V %B, float %f) {
         %C = insertelement %V %A, float %f, i32 0               ; <%V> [#uses=1]
-        %D = add %V %C, %B              ; <%V> [#uses=1]
+        %D = fadd %V %C, %B              ; <%V> [#uses=1]
         %E = extractelement %V %D, i32 0                ; <float> [#uses=1]
         ret float %E
 }
diff --git a/test/Transforms/InstCombine/zero-point-zero-add.ll b/test/Transforms/InstCombine/zero-point-zero-add.ll
index bae60d9..adb28e4 100644
--- a/test/Transforms/InstCombine/zero-point-zero-add.ll
+++ b/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -3,13 +3,13 @@
 declare double @abs(double)
 
 define double @test(double %X) {
-  %Y = add double %X, 0.0          ;; Should be a single add x, 0.0
-  %Z = add double %Y, 0.0
+  %Y = fadd double %X, 0.0          ;; Should be a single add x, 0.0
+  %Z = fadd double %Y, 0.0
   ret double %Z
 }
 
 define double @test1(double %X) {
   %Y = call double @abs(double %X)
-  %Z = add double %Y, 0.0
+  %Z = fadd double %Y, 0.0
   ret double %Z
 }
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM-2.ll b/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
index 58bb19d..e8dc391 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
@@ -5,10 +5,10 @@ entry:
 
 bb7:		; preds = %bb7, %entry
 	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
-	%tmp40 = add <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=1]
-	%tmp43 = add <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >		; <<4 x float>> [#uses=1]
-	%tmp46 = add <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >		; <<4 x float>> [#uses=1]
-	%tmp49 = add <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp49 = fadd <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp49, <4 x float>* null
 	br i1 false, label %bb7, label %bb56
 
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM-3.ll b/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
index 79370ee..72cebed 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
@@ -9,10 +9,10 @@ bb:             ; preds = %bb56, %entry
 
 bb7:            ; preds = %bb7, %bb
         %tmp39 = load <4 x float>* null         ; <<4 x float>> [#uses=1]
-        %tmp40 = add <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >             ; <<4 x float>> [#uses=1]
-        %tmp43 = add <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >             ; <<4 x float>> [#uses=1]
-        %tmp46 = add <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >             ; <<4 x float>> [#uses=1]
-        %tmp49 = add <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp49 = fadd <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >             ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp49, <4 x float>* null
         br i1 false, label %bb7, label %bb56
 
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM.ll b/test/Transforms/LCSSA/2007-07-12-LICM.ll
index 1c9830e..0c433c3 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM.ll
@@ -5,7 +5,7 @@ entry:
 
 bb7:		; preds = %bb7, %entry
 	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
-	%tmp40 = add <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=0]
+	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	br i1 false, label %bb7, label %bb56
 
diff --git a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
index ec29847..928fd95 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
@@ -8,7 +8,7 @@ entry:
 
 bb.preheader:		; preds = %entry
 	%tmp3031 = fpext float %contribution to double		; <double> [#uses=1]
-	%tmp32 = mul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
+	%tmp32 = fmul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
 	%tmp3839 = fpext float %sigmal to double		; <double> [#uses=1]
 	br label %bb
 
@@ -22,19 +22,19 @@ bb:		; preds = %bb.preheader, %cond_next45
 cond_true9:		; preds = %bb
 	%tmp12 = getelementptr float* %x, i32 %i.01.0		; <float*> [#uses=1]
 	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
-	%tmp15 = sub float %xcen, %tmp13		; <float> [#uses=1]
+	%tmp15 = fsub float %xcen, %tmp13		; <float> [#uses=1]
 	%tmp16 = tail call float @fabsf( float %tmp15 )		; <float> [#uses=1]
 	%tmp18 = fdiv float %tmp16, %sigmal		; <float> [#uses=1]
 	%tmp21 = load float** %y, align 4		; <float*> [#uses=2]
 	%tmp27 = getelementptr float* %tmp21, i32 %i.01.0		; <float*> [#uses=1]
 	%tmp28 = load float* %tmp27, align 4		; <float> [#uses=1]
 	%tmp2829 = fpext float %tmp28 to double		; <double> [#uses=1]
-	%tmp34 = sub float -0.000000e+00, %tmp18		; <float> [#uses=1]
+	%tmp34 = fsub float -0.000000e+00, %tmp18		; <float> [#uses=1]
 	%tmp3435 = fpext float %tmp34 to double		; <double> [#uses=1]
 	%tmp36 = tail call double @exp( double %tmp3435 )		; <double> [#uses=1]
-	%tmp37 = mul double %tmp32, %tmp36		; <double> [#uses=1]
+	%tmp37 = fmul double %tmp32, %tmp36		; <double> [#uses=1]
 	%tmp40 = fdiv double %tmp37, %tmp3839		; <double> [#uses=1]
-	%tmp41 = add double %tmp2829, %tmp40		; <double> [#uses=1]
+	%tmp41 = fadd double %tmp2829, %tmp40		; <double> [#uses=1]
 	%tmp4142 = fptrunc double %tmp41 to float		; <float> [#uses=1]
 	%tmp44 = getelementptr float* %tmp21, i32 %i.01.0		; <float*> [#uses=1]
 	store float %tmp4142, float* %tmp44, align 4
diff --git a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
index 7d93785..6619c7d 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
@@ -9,7 +9,7 @@ entry:
 
 bb.preheader:		; preds = %entry
 	%tmp3031 = fpext float %contribution to double		; <double> [#uses=1]
-	%tmp32 = mul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
+	%tmp32 = fmul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
 	%tmp3839 = fpext float %sigmal to double		; <double> [#uses=1]
 	br label %bb
 
@@ -24,19 +24,19 @@ bb:		; preds = %cond_next45, %bb.preheader
 cond_true9:		; preds = %bb
 	%tmp12 = getelementptr float* %x, i32 %i.01.0		; <float*> [#uses=1]
 	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
-	%tmp15 = sub float %xcen, %tmp13		; <float> [#uses=1]
+	%tmp15 = fsub float %xcen, %tmp13		; <float> [#uses=1]
 	%tmp16 = tail call float @fabsf(float %tmp15)		; <float> [#uses=1]
 	%tmp18 = fdiv float %tmp16, %sigmal		; <float> [#uses=1]
 	%tmp21 = load float** %y, align 4		; <float*> [#uses=2]
 	%tmp27 = getelementptr float* %tmp21, i32 %k.06.0		; <float*> [#uses=1]
 	%tmp28 = load float* %tmp27, align 4		; <float> [#uses=1]
 	%tmp2829 = fpext float %tmp28 to double		; <double> [#uses=1]
-	%tmp34 = sub float -0.000000e+00, %tmp18		; <float> [#uses=1]
+	%tmp34 = fsub float -0.000000e+00, %tmp18		; <float> [#uses=1]
 	%tmp3435 = fpext float %tmp34 to double		; <double> [#uses=1]
 	%tmp36 = tail call double @exp(double %tmp3435)		; <double> [#uses=1]
-	%tmp37 = mul double %tmp32, %tmp36		; <double> [#uses=1]
+	%tmp37 = fmul double %tmp32, %tmp36		; <double> [#uses=1]
 	%tmp40 = fdiv double %tmp37, %tmp3839		; <double> [#uses=1]
-	%tmp41 = add double %tmp2829, %tmp40		; <double> [#uses=1]
+	%tmp41 = fadd double %tmp2829, %tmp40		; <double> [#uses=1]
 	%tmp4142 = fptrunc double %tmp41 to float		; <float> [#uses=1]
 	%tmp44 = getelementptr float* %tmp21, i32 %k.06.0		; <float*> [#uses=1]
 	store float %tmp4142, float* %tmp44, align 4
diff --git a/test/Transforms/Mem2Reg/PromoteMemToRegister.ll b/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
index fdc33fb..63b8c78 100644
--- a/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
+++ b/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
@@ -12,7 +12,7 @@ define double @testfunc(i32 %i, double %j) {
 	%t3 = load i32* %I		; <i32> [#uses=1]
 	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
 	%t5 = load double* %J		; <double> [#uses=1]
-	%t6 = mul double %t4, %t5		; <double> [#uses=1]
+	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
 	ret double %t6
 }
 
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index c5cdc29..94daee0 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -7,7 +7,7 @@ define void @ccosl({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86
 entry:
 	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
+	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
 	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
 	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
 	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index 1ac11aa..ad9fb1b 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -9,7 +9,7 @@ entry:
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
 	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
 	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
 	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
diff --git a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
index c3600ab..74434f4 100644
--- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
+++ b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
@@ -477,12 +477,12 @@ invcont3:		; preds = %bb2
 	unreachable
 
 bb4:		; preds = %invcont
-	%3 = mul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
+	%3 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
 	%4 = fcmp ult x86_fp80 %3, 0xKC0068000000000000000		; <i1> [#uses=1]
 	br i1 %4, label %bb8, label %bb6
 
 bb6:		; preds = %bb4
-	%5 = mul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
+	%5 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
 	%6 = fcmp ugt x86_fp80 %5, 0xK4005FE00000000000000		; <i1> [#uses=1]
 	br i1 %6, label %bb8, label %bb10
 
@@ -494,16 +494,16 @@ invcont9:		; preds = %bb8
 	unreachable
 
 bb10:		; preds = %bb6
-	%7 = mul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=3]
+	%7 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=3]
 	%8 = fcmp ult x86_fp80 %7, 0xK00000000000000000000		; <i1> [#uses=1]
 	br i1 %8, label %bb13, label %bb12
 
 bb12:		; preds = %bb10
-	%9 = add x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
+	%9 = fadd x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
 	br label %bb14
 
 bb13:		; preds = %bb10
-	%10 = sub x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
+	%10 = fsub x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
 	br label %bb14
 
 bb14:		; preds = %bb13, %bb12
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index 05d6103..3662e09 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -1,8 +1,8 @@
 ; RUN: llvm-as < %s | opt -reassociate -disable-output
 
 define void @foo() {
-	%tmp162 = sub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
-	%tmp164 = mul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
+	%tmp162 = fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp164 = fmul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
 	ret void
 }
 
diff --git a/test/Transforms/SCCP/2006-12-04-PackedType.ll b/test/Transforms/SCCP/2006-12-04-PackedType.ll
index b7a7880..0e268c2 100644
--- a/test/Transforms/SCCP/2006-12-04-PackedType.ll
+++ b/test/Transforms/SCCP/2006-12-04-PackedType.ll
@@ -112,7 +112,7 @@ cond_true93:		; preds = %entry
 	%tmp.upgrd.1 = getelementptr %struct.GLDContextRec* %ctx, i32 0, i32 31, i32 14		; <i32*> [#uses=1]
 	%tmp95 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]
 	%tmp95.upgrd.2 = sitofp i32 %tmp95 to float		; <float> [#uses=1]
-	%tmp108 = mul float undef, %tmp95.upgrd.2		; <float> [#uses=1]
+	%tmp108 = fmul float undef, %tmp95.upgrd.2		; <float> [#uses=1]
 	br label %cond_next116
 cond_next116:		; preds = %cond_true93, %entry
 	%point_size.2 = phi float [ %tmp108, %cond_true93 ], [ undef, %entry ]		; <float> [#uses=2]
@@ -130,7 +130,7 @@ cond_true462:		; preds = %cond_true458
 cond_true467:		; preds = %cond_true462
 	ret void
 cond_next484:		; preds = %cond_next116
-	%tmp486 = mul float %point_size.2, 5.000000e-01		; <float> [#uses=1]
+	%tmp486 = fmul float %point_size.2, 5.000000e-01		; <float> [#uses=1]
 	br label %cond_next487
 cond_next487:		; preds = %cond_next484, %cond_true462, %cond_true458
 	%radius.0 = phi float [ %tmp486, %cond_next484 ], [ 5.000000e-01, %cond_true458 ], [ 5.000000e-01, %cond_true462 ]		; <float> [#uses=2]
diff --git a/test/Transforms/SCCP/apint-ipsccp4.ll b/test/Transforms/SCCP/apint-ipsccp4.ll
index de355d1..a0656b7 100644
--- a/test/Transforms/SCCP/apint-ipsccp4.ll
+++ b/test/Transforms/SCCP/apint-ipsccp4.ll
@@ -35,10 +35,10 @@ define float @All()
   %B = fcmp oge float %A, 1.0
   br i1 %B, label %T, label %F
 T:
-  %C = add float %A, 1.0
+  %C = fadd float %A, 1.0
   br label %exit
 F:
-  %D = add float %A, 2.0
+  %D = fadd float %A, 2.0
   br label %exit
 exit:
   %E = phi float [%C, %T], [%D, %F]
diff --git a/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll b/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
index 13055ea..facb7c1 100644
--- a/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
+++ b/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
@@ -1766,7 +1766,7 @@ _ZL13random_doublev.exit:		; preds = %bb.i, %bb7
 	call void @llvm.dbg.stoppoint(i32 75, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%22 = load i32* @_ZZL13random_doublevE4seed, align 4		; <i32> [#uses=2]
 	%23 = sitofp i32 %22 to double		; <double> [#uses=1]
-	%24 = mul double %23, 0x3E340000002813D9		; <double> [#uses=1]
+	%24 = fmul double %23, 0x3E340000002813D9		; <double> [#uses=1]
 	call void @llvm.dbg.stoppoint(i32 76, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%25 = xor i32 %22, 123459876		; <i32> [#uses=1]
 	store i32 %25, i32* @_ZZL13random_doublevE4seed, align 4
@@ -1803,7 +1803,7 @@ bb8:		; preds = %bb.i1, %_ZL13random_doublev.exit
 	call void @llvm.dbg.stoppoint(i32 75, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%38 = load i32* @_ZZL13random_doublevE4seed, align 4		; <i32> [#uses=2]
 	%39 = sitofp i32 %38 to double		; <double> [#uses=1]
-	%40 = mul double %39, 0x3E340000002813D9		; <double> [#uses=1]
+	%40 = fmul double %39, 0x3E340000002813D9		; <double> [#uses=1]
 	call void @llvm.dbg.stoppoint(i32 76, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%41 = xor i32 %38, 123459876		; <i32> [#uses=1]
 	store i32 %41, i32* @_ZZL13random_doublevE4seed, align 4
@@ -2110,16 +2110,16 @@ entry:
 	%real7 = load double* %real6, align 8		; <double> [#uses=4]
 	%imag8 = getelementptr %1* %memtmp1, i32 0, i32 1		; <double*> [#uses=1]
 	%imag9 = load double* %imag8, align 8		; <double> [#uses=4]
-	%21 = mul double %real3, %real7		; <double> [#uses=1]
-	%22 = mul double %imag5, %imag9		; <double> [#uses=1]
-	%23 = add double %21, %22		; <double> [#uses=1]
-	%24 = mul double %real7, %real7		; <double> [#uses=1]
-	%25 = mul double %imag9, %imag9		; <double> [#uses=1]
-	%26 = add double %24, %25		; <double> [#uses=2]
+	%21 = fmul double %real3, %real7		; <double> [#uses=1]
+	%22 = fmul double %imag5, %imag9		; <double> [#uses=1]
+	%23 = fadd double %21, %22		; <double> [#uses=1]
+	%24 = fmul double %real7, %real7		; <double> [#uses=1]
+	%25 = fmul double %imag9, %imag9		; <double> [#uses=1]
+	%26 = fadd double %24, %25		; <double> [#uses=2]
 	%27 = fdiv double %23, %26		; <double> [#uses=1]
-	%28 = mul double %imag5, %real7		; <double> [#uses=1]
-	%29 = mul double %real3, %imag9		; <double> [#uses=1]
-	%30 = sub double %28, %29		; <double> [#uses=1]
+	%28 = fmul double %imag5, %real7		; <double> [#uses=1]
+	%29 = fmul double %real3, %imag9		; <double> [#uses=1]
+	%30 = fsub double %28, %29		; <double> [#uses=1]
 	%31 = fdiv double %30, %26		; <double> [#uses=1]
 	%real10 = getelementptr %1* %0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %27, double* %real10, align 8
@@ -2227,12 +2227,12 @@ entry:
 	%real9 = load double* %real8, align 8		; <double> [#uses=2]
 	%imag10 = getelementptr %1* %memtmp3, i32 0, i32 1		; <double*> [#uses=1]
 	%imag11 = load double* %imag10, align 8		; <double> [#uses=2]
-	%27 = mul double %real5, %real9		; <double> [#uses=1]
-	%28 = mul double %imag7, %imag11		; <double> [#uses=1]
-	%29 = sub double %27, %28		; <double> [#uses=1]
-	%30 = mul double %real5, %imag11		; <double> [#uses=1]
-	%31 = mul double %real9, %imag7		; <double> [#uses=1]
-	%32 = add double %30, %31		; <double> [#uses=1]
+	%27 = fmul double %real5, %real9		; <double> [#uses=1]
+	%28 = fmul double %imag7, %imag11		; <double> [#uses=1]
+	%29 = fsub double %27, %28		; <double> [#uses=1]
+	%30 = fmul double %real5, %imag11		; <double> [#uses=1]
+	%31 = fmul double %real9, %imag7		; <double> [#uses=1]
+	%32 = fadd double %30, %31		; <double> [#uses=1]
 	%real12 = getelementptr %1* %0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %29, double* %real12, align 8
 	%imag13 = getelementptr %1* %0, i32 0, i32 1		; <double*> [#uses=1]
@@ -2384,10 +2384,10 @@ entry:
 	call void @llvm.dbg.stoppoint(i32 444, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
 	%0 = call double* @_ZNKSt7complexIdE4imagEv(%"struct.std::complex<double>"* %__x) nounwind		; <double*> [#uses=1]
 	%1 = load double* %0, align 8		; <double> [#uses=1]
-	%2 = sub double -0.000000e+00, %1		; <double> [#uses=1]
+	%2 = fsub double -0.000000e+00, %1		; <double> [#uses=1]
 	%3 = call double* @_ZNKSt7complexIdE4realEv(%"struct.std::complex<double>"* %__x) nounwind		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = sub double -0.000000e+00, %4		; <double> [#uses=1]
+	%5 = fsub double -0.000000e+00, %4		; <double> [#uses=1]
 	call void @_ZNSt7complexIdEC1Edd(%"struct.std::complex<double>"* %agg.result, double %5, double %2) nounwind
 	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram576 to %0*))
 	ret void
@@ -2497,16 +2497,16 @@ entry:
 	%real9 = load double* %real8, align 8		; <double> [#uses=4]
 	%imag10 = getelementptr %1* %memtmp3, i32 0, i32 1		; <double*> [#uses=1]
 	%imag11 = load double* %imag10, align 8		; <double> [#uses=4]
-	%27 = mul double %real5, %real9		; <double> [#uses=1]
-	%28 = mul double %imag7, %imag11		; <double> [#uses=1]
-	%29 = add double %27, %28		; <double> [#uses=1]
-	%30 = mul double %real9, %real9		; <double> [#uses=1]
-	%31 = mul double %imag11, %imag11		; <double> [#uses=1]
-	%32 = add double %30, %31		; <double> [#uses=2]
+	%27 = fmul double %real5, %real9		; <double> [#uses=1]
+	%28 = fmul double %imag7, %imag11		; <double> [#uses=1]
+	%29 = fadd double %27, %28		; <double> [#uses=1]
+	%30 = fmul double %real9, %real9		; <double> [#uses=1]
+	%31 = fmul double %imag11, %imag11		; <double> [#uses=1]
+	%32 = fadd double %30, %31		; <double> [#uses=2]
 	%33 = fdiv double %29, %32		; <double> [#uses=1]
-	%34 = mul double %imag7, %real9		; <double> [#uses=1]
-	%35 = mul double %real5, %imag11		; <double> [#uses=1]
-	%36 = sub double %34, %35		; <double> [#uses=1]
+	%34 = fmul double %imag7, %real9		; <double> [#uses=1]
+	%35 = fmul double %real5, %imag11		; <double> [#uses=1]
+	%36 = fsub double %34, %35		; <double> [#uses=1]
 	%37 = fdiv double %36, %32		; <double> [#uses=1]
 	%real12 = getelementptr %1* %0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %33, double* %real12, align 8
@@ -2554,7 +2554,7 @@ entry:
 	%1 = load double* %0, align 4		; <double> [#uses=1]
 	%2 = call double* @_ZNKSt7complexIdE4realEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%3 = load double* %2, align 8		; <double> [#uses=1]
-	%4 = add double %1, %3		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
 	%5 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %4, double* %5, align 4
 	call void @llvm.dbg.stoppoint(i32 1271, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
@@ -2562,7 +2562,7 @@ entry:
 	%7 = load double* %6, align 4		; <double> [#uses=1]
 	%8 = call double* @_ZNKSt7complexIdE4imagEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%9 = load double* %8, align 8		; <double> [#uses=1]
-	%10 = add double %7, %9		; <double> [#uses=1]
+	%10 = fadd double %7, %9		; <double> [#uses=1]
 	%11 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 1		; <double*> [#uses=1]
 	store double %10, double* %11, align 4
 	call void @llvm.dbg.stoppoint(i32 1272, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
@@ -2599,7 +2599,7 @@ entry:
 	%1 = load double* %0, align 4		; <double> [#uses=1]
 	%2 = call double* @_ZNKSt7complexIdE4realEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%3 = load double* %2, align 8		; <double> [#uses=1]
-	%4 = sub double %1, %3		; <double> [#uses=1]
+	%4 = fsub double %1, %3		; <double> [#uses=1]
 	%5 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %4, double* %5, align 4
 	call void @llvm.dbg.stoppoint(i32 1280, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
@@ -2607,7 +2607,7 @@ entry:
 	%7 = load double* %6, align 4		; <double> [#uses=1]
 	%8 = call double* @_ZNKSt7complexIdE4imagEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%9 = load double* %8, align 8		; <double> [#uses=1]
-	%10 = sub double %7, %9		; <double> [#uses=1]
+	%10 = fsub double %7, %9		; <double> [#uses=1]
 	%11 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 1		; <double*> [#uses=1]
 	store double %10, double* %11, align 4
 	call void @llvm.dbg.stoppoint(i32 1281, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
index 4ab17ae..a1ad3f9 100644
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -25,7 +25,7 @@ define float @test2(i128 %V) nounwind {
 	%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
 	%a = load float* %A
 	%b = load float* %B
-	%c = add float %a, %b
+	%c = fadd float %a, %b
 	ret float %c
 }
 
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index ee77e1f..e62ccc2 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -10,23 +10,23 @@ entry:
 	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
 	%tmp753 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
 	%tmp9 = load float* %tmp753		; <float> [#uses=1]
-	%tmp11 = mul float %tmp9, %x		; <float> [#uses=1]
-	%tmp13 = add float %tmp11, 0.000000e+00		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %x		; <float> [#uses=1]
+	%tmp13 = fadd float %tmp11, 0.000000e+00		; <float> [#uses=1]
 	%tmp17.sum52 = or i32 %tmp5, 1		; <i32> [#uses=1]
 	%tmp1851 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp17.sum52		; <float*> [#uses=1]
 	%tmp19 = load float* %tmp1851		; <float> [#uses=1]
-	%tmp21 = mul float %tmp19, %y		; <float> [#uses=1]
-	%tmp23 = add float %tmp21, %tmp13		; <float> [#uses=1]
+	%tmp21 = fmul float %tmp19, %y		; <float> [#uses=1]
+	%tmp23 = fadd float %tmp21, %tmp13		; <float> [#uses=1]
 	%tmp27.sum50 = or i32 %tmp5, 2		; <i32> [#uses=1]
 	%tmp2849 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp27.sum50		; <float*> [#uses=1]
 	%tmp29 = load float* %tmp2849		; <float> [#uses=1]
-	%tmp31 = mul float %tmp29, %z		; <float> [#uses=1]
-	%tmp33 = add float %tmp31, %tmp23		; <float> [#uses=1]
+	%tmp31 = fmul float %tmp29, %z		; <float> [#uses=1]
+	%tmp33 = fadd float %tmp31, %tmp23		; <float> [#uses=1]
 	%tmp37.sum48 = or i32 %tmp5, 3		; <i32> [#uses=1]
 	%tmp3847 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp37.sum48		; <float*> [#uses=1]
 	%tmp39 = load float* %tmp3847		; <float> [#uses=1]
-	%tmp41 = mul float %tmp39, %w		; <float> [#uses=1]
-	%tmp43 = add float %tmp41, %tmp33		; <float> [#uses=1]
+	%tmp41 = fmul float %tmp39, %w		; <float> [#uses=1]
+	%tmp43 = fadd float %tmp41, %tmp33		; <float> [#uses=1]
 	ret float %tmp43
 }
 
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index a0d3317..4b6555b 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -5,12 +5,12 @@ define void @test(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%G.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
 	store float %f, float* %G.upgrd.1
 	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -19,12 +19,12 @@ define void @test2(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	store float %f, float* %tmp.upgrd.2
 	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -33,7 +33,7 @@ define void @test3(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.3 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	%tmp.upgrd.4 = load float* %tmp.upgrd.3		; <float> [#uses=1]
@@ -45,7 +45,7 @@ define void @test4(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%G.upgrd.5 = getelementptr <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp.upgrd.6 = load float* %G.upgrd.5		; <float> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
index f22ca6c..6bfef02 100644
--- a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
@@ -142,11 +142,11 @@ invcont57:		; preds = %invcont51
 	store double %tmp64, double* %tmp62
 	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
 	%tmp67 = load double* %tmp65		; <double> [#uses=1]
-	%tmp69 = mul double %tmp67, %tmp48		; <double> [#uses=1]
+	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
 	store double %tmp69, double* %tmp65
 	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
 	%tmp73 = load double* %tmp71		; <double> [#uses=1]
-	%tmp75 = mul double %tmp73, %tmp54		; <double> [#uses=1]
+	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
 	store double %tmp75, double* %tmp71
 	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
 	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
@@ -188,7 +188,7 @@ invcont104:		; preds = %invcont103
 			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
 invcont106:		; preds = %invcont104
 	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
-	%tmp109 = mul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
+	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
 	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
 	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
 	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
@@ -235,7 +235,7 @@ invcont124:		; preds = %invcont122
 	store double %tmp137, double* %tmp135
 	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
 	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp140 = sub double %tmp138, %tmp139		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
 	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
 			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
 invcont141:		; preds = %invcont124
@@ -249,7 +249,7 @@ invcont146:		; preds = %invcont144
 			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
 invcont148:		; preds = %invcont146
 	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
-	%tmp150 = add double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
+	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
 	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
 			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
 invcont151:		; preds = %invcont148
@@ -259,10 +259,10 @@ invcont153:		; preds = %invcont151
 	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
 	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
 	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
-	%tmp157 = add double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
+	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
 	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
 	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp160 = sub double %tmp158, %tmp159		; <double> [#uses=1]
+	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
 	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
 	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 43ff690..4c9c9e8 100644
--- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -15,7 +15,7 @@ entry:
 	br i1 %toBool, label %cond_true, label %cond_next
 
 cond_true:		; preds = %entry
-	%tmp7 = add double %tmp, %Z		; <double> [#uses=1]
+	%tmp7 = fadd double %tmp, %Z		; <double> [#uses=1]
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %entry
diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
index a370b95..be3410c 100644
--- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
+++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
@@ -21,7 +21,7 @@ bb56:		; preds = %bb48
 
 
 bb174:		; preds = %bb144, %bb114
-	%tmp191 = mul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
+	%tmp191 = fmul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
 	br label %bb196
 
 bb196:		; preds = %bb174, %bb56, %bb40
diff --git a/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll b/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
index 5969f27c..dc0cbbe 100644
--- a/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
+++ b/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
@@ -29,7 +29,7 @@ bb3:		; preds = %bb2, %bb1
 	store i32 %storemerge, i32* @j
 	%1 = sitofp i32 %storemerge to double		; <double> [#uses=1]
 	%2 = call double @sin(double %1) nounwind readonly		; <double> [#uses=1]
-	%3 = add double %2, %d.0		; <double> [#uses=1]
+	%3 = fadd double %2, %d.0		; <double> [#uses=1]
 	%4 = add i32 %l.0, 1		; <i32> [#uses=1]
 	br label %bb4
 
diff --git a/test/Transforms/SimplifyLibCalls/half-powr.ll b/test/Transforms/SimplifyLibCalls/half-powr.ll
index f4e898c..890e788 100644
--- a/test/Transforms/SimplifyLibCalls/half-powr.ll
+++ b/test/Transforms/SimplifyLibCalls/half-powr.ll
@@ -11,7 +11,7 @@ bb:		; preds = %entry
 
 bb1:		; preds = %bb, %entry
 	%f_addr.0 = phi float [ %1, %bb ], [ %f, %entry ]		; <float> [#uses=1]
-	%2 = mul float %f_addr.0, %g		; <float> [#uses=1]
+	%2 = fmul float %f_addr.0, %g		; <float> [#uses=1]
 	ret float %2
 }
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 113d987..5c1ee35 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -2,9 +2,9 @@
 # large and three small executables. This is done to minimize memory load
 # in parallel builds.  Please retain this ordering.
 
-if( NOT MSVC )
-  add_subdirectory(llvm-config)
-endif( NOT MSVC )
+if (NOT USE_EXPLICIT_DEPENDENCIES)
+ add_subdirectory(llvm-config)
+endif()
 
 add_subdirectory(opt)
 add_subdirectory(llvm-as)
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 4808f0e..c630331 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -100,6 +100,16 @@ cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
                        cl::desc("Do not verify input module"));
 
 
+static cl::opt<bool>
+DisableRedZone("disable-red-zone",
+  cl::desc("Do not emit code that uses the red zone."),
+  cl::init(false));
+
+static cl::opt<bool>
+NoImplicitFloats("no-implicit-float",
+  cl::desc("Don't generate implicit floating point instructions (x86-only)"),
+  cl::init(false));
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -336,8 +346,13 @@ int main(int argc, char **argv) {
     // Run our queue of passes all at once now, efficiently.
     // TODO: this could lazily stream functions out of the module.
     for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I)
-      if (!I->isDeclaration())
+      if (!I->isDeclaration()) {
+        if (DisableRedZone)
+          I->addFnAttr(Attribute::NoRedZone);
+        if (NoImplicitFloats)
+          I->addFnAttr(Attribute::NoImplicitFloat);
         Passes.run(*I);
+      }
 
     Passes.doFinalization();
   }
diff --git a/tools/llvm-ld/Optimize.cpp b/tools/llvm-ld/Optimize.cpp
index a4ca951..e466895 100644
--- a/tools/llvm-ld/Optimize.cpp
+++ b/tools/llvm-ld/Optimize.cpp
@@ -94,7 +94,7 @@ void Optimize(Module* M) {
 
   if (!DisableOptimizations)
     createStandardLTOPasses(&Passes, !DisableInternalize, !DisableInline,
-                            /*RunSecondGlobalOpt=*/true, VerifyEach);
+                            VerifyEach);
 
   // If the -s or -S command line options were specified, strip the symbols out
   // of the resulting program to make it smaller.  -s and -S are GNU ld options
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 03a11b6..0bd2abe 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -72,7 +72,7 @@ LTOCodeGenerator::LTOCodeGenerator()
     : _linker("LinkTimeOptimizer", "ld-temp.o"), _target(NULL),
       _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
       _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
-      _nativeObjectFile(NULL), _gccPath(NULL)
+      _nativeObjectFile(NULL), _gccPath(NULL), _assemblerPath(NULL)
 {
 
 }
@@ -128,6 +128,13 @@ void LTOCodeGenerator::setGccPath(const char* path)
     _gccPath = new sys::Path(path);
 }
 
+void LTOCodeGenerator::setAssemblerPath(const char* path)
+{
+    if ( _assemblerPath )
+        delete _assemblerPath;
+    _assemblerPath = new sys::Path(path);
+}
+
 void LTOCodeGenerator::addMustPreserveSymbol(const char* sym)
 {
     _mustPreserveSymbols[sym] = 1;
@@ -220,13 +227,18 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
 bool LTOCodeGenerator::assemble(const std::string& asmPath, 
                                 const std::string& objPath, std::string& errMsg)
 {
-    sys::Path gcc;
-    if ( _gccPath ) {
-        gcc = *_gccPath;
+    sys::Path tool;
+    bool needsCompilerOptions = true;
+    if ( _assemblerPath ) {
+        tool = *_assemblerPath;
+        needsCompilerOptions = false;
+    }
+    else if ( _gccPath ) {
+        tool = *_gccPath;
     } else {
         // find compiler driver
-        gcc = sys::Program::FindProgramByName("gcc");
-        if ( gcc.isEmpty() ) {
+        tool = sys::Program::FindProgramByName("gcc");
+        if ( tool.isEmpty() ) {
             errMsg = "can't locate gcc";
             return true;
         }
@@ -235,8 +247,9 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
     // build argument list
     std::vector<const char*> args;
     std::string targetTriple = _linker.getModule()->getTargetTriple();
-    args.push_back(gcc.c_str());
+    args.push_back(tool.c_str());
     if ( targetTriple.find("darwin") != targetTriple.size() ) {
+        // darwin specific command line options
         if (strncmp(targetTriple.c_str(), "i386-apple-", 11) == 0) {
             args.push_back("-arch");
             args.push_back("i386");
@@ -274,17 +287,22 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
             args.push_back("-arch");
             args.push_back("armv6");
         }
+        // add -static to assembler command line when code model requires
+        if ( (_assemblerPath != NULL) && (_codeModel == LTO_CODEGEN_PIC_MODEL_STATIC) )
+            args.push_back("-static");
+    }
+    if ( needsCompilerOptions ) {
+        args.push_back("-c");
+        args.push_back("-x");
+        args.push_back("assembler");
     }
-    args.push_back("-c");
-    args.push_back("-x");
-    args.push_back("assembler");
     args.push_back("-o");
     args.push_back(objPath.c_str());
     args.push_back(asmPath.c_str());
     args.push_back(0);
 
     // invoke assembler
-    if ( sys::Program::ExecuteAndWait(gcc, &args[0], 0, 0, 0, 0, &errMsg) ) {
+    if ( sys::Program::ExecuteAndWait(tool, &args[0], 0, 0, 0, 0, &errMsg) ) {
         errMsg = "error in assembly";    
         return true;
     }
@@ -304,6 +322,20 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
         if ( march == NULL )
             return true;
 
+        // The relocation model is actually a static member of TargetMachine
+        // and needs to be set before the TargetMachine is instantiated.
+        switch( _codeModel ) {
+        case LTO_CODEGEN_PIC_MODEL_STATIC:
+            TargetMachine::setRelocationModel(Reloc::Static);
+            break;
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+            TargetMachine::setRelocationModel(Reloc::PIC_);
+            break;
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+            TargetMachine::setRelocationModel(Reloc::DynamicNoPIC);
+            break;
+        }
+
         // construct LTModule, hand over ownership of module and target
         std::string FeatureStr =
           getFeatureString(_linker.getModule()->getTargetTriple().c_str());
@@ -363,19 +395,6 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
     if ( _target->getTargetAsmInfo()->doesSupportExceptionHandling() )
         llvm::ExceptionHandling = true;
 
-    // set codegen model
-    switch( _codeModel ) {
-        case LTO_CODEGEN_PIC_MODEL_STATIC:
-            _target->setRelocationModel(Reloc::Static);
-            break;
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-            _target->setRelocationModel(Reloc::PIC_);
-            break;
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-            _target->setRelocationModel(Reloc::DynamicNoPIC);
-            break;
-    }
-
     // if options were requested, set them
     if ( !_codegenOptions.empty() )
         cl::ParseCommandLineOptions(_codegenOptions.size(), 
@@ -391,7 +410,6 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
     passes.add(new TargetData(*_target->getTargetData()));
     
     createStandardLTOPasses(&passes, /*Internalize=*/ false, !DisableInline,
-                            /*RunSecondGlobalOpt=*/ false, 
                             /*VerifyEach=*/ false);
 
     // Make sure everything is still good.
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 57398b0..e02a7ab 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -37,6 +37,7 @@ public:
     bool                setDebugInfo(lto_debug_model, std::string& errMsg);
     bool                setCodePICModel(lto_codegen_model, std::string& errMsg);
     void                setGccPath(const char* path);
+    void                setAssemblerPath(const char* path);
     void                addMustPreserveSymbol(const char* sym);
     bool                writeMergedModules(const char* path, 
                                                            std::string& errMsg);
@@ -61,6 +62,7 @@ private:
     llvm::MemoryBuffer*         _nativeObjectFile;
     std::vector<const char*>    _codegenOptions;
     llvm::sys::Path*            _gccPath;
+    llvm::sys::Path*            _assemblerPath;
 };
 
 #endif // LTO_CODE_GENERATOR_H
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 5c3f90a..7eb39ef 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -210,6 +210,14 @@ void lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path)
 }
 
 //
+// sets the path to the assembler tool
+//
+void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
+{
+    cg->setAssemblerPath(path);
+}
+
+//
 // adds to a list of all global symbols that must exist in the final
 // generated code.  If a function is not listed there, it might be
 // inlined into every usage and optimized away.
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index aff7559..01f43d1 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -20,4 +20,5 @@ _lto_codegen_set_debug_model
 _lto_codegen_set_pic_model
 _lto_codegen_write_merged_modules
 _lto_codegen_debug_options
+_lto_codegen_set_assembler_path
 
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 05083bf..2ca849d 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -53,7 +53,8 @@ FPVAL               ::= ESAPINTVAL ^ "." ^ EUAPINTVAL | "0x" ^ HexDigitSeq ;
 The rest of this file is derived directly from llvmAsmParser.y.
 *)
 
-ArithmeticOps ::= add | sub | mul | udiv | sdiv | fdiv | urem | srem | frem ;
+ArithmeticOps ::= add | fadd | sub | fsub | mul | fmul |
+                  udiv | sdiv | fdiv | urem | srem | frem ;
 LogicalOps    ::= shl | lshr | ashr | and | or | xor;
 CastOps       ::= trunc | zext | sext | fptrunc | fpext | bitcast |
                   uitofp | sitofp | fptoui | fptosi | inttoptr | ptrtoint ;
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 201d8dd..b4104f9 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -22,7 +22,8 @@ syn match   llvmType /\<i\d\+\>/
 " Instructions.
 " The true and false tokens can be used for comparison opcodes, but it's
 " much more common for these tokens to be used for boolean constants.
-syn keyword llvmStatement add sub mul sdiv udiv fdiv srem urem frem
+syn keyword llvmStatement add fadd sub fsub mul fmul
+syn keyword llvmStatement sdiv udiv fdiv srem urem frem
 syn keyword llvmStatement and or xor
 syn keyword llvmStatement icmp fcmp
 syn keyword llvmStatement eq ne ugt uge ult ule sgt sge slt sle
author	ed <ed@FreeBSD.org>	2009-06-06 08:20:29 +0000
committer	ed <ed@FreeBSD.org>	2009-06-06 08:20:29 +0000
commit	2361a5c2bfbaef476824e51fa72712e334219c7b (patch)
tree	8a1bbd1a5b838080d31e5c93a1817006b8c62318
parent	1941b8772a36a33c7b86cb67163cd735b3d57221 (diff)
download	FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.zip FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.tar.gz