Virgin import of the GCC 2.95.1 compilers

author: obrien <obrien@FreeBSD.org> 1999-10-16 06:09:09 +0000
committer: obrien <obrien@FreeBSD.org> 1999-10-16 06:09:09 +0000
commit: cae8fa8120c70195f34a2456f18c4c848a2d3e0c (patch)
tree: f7d3a3ab9c32694206552e767626366f016f2062 /contrib/gcc/config/sparc
parent: 84656b55b6e25e30322dc903a05de53706361d3d (diff)
download: FreeBSD-src-cae8fa8120c70195f34a2456f18c4c848a2d3e0c.zip
FreeBSD-src-cae8fa8120c70195f34a2456f18c4c848a2d3e0c.tar.gz
25 files changed, 7578 insertions, 4024 deletions
diff --git a/contrib/gcc/config/sparc/elf.h b/contrib/gcc/config/sparc/elf.h
index 70cb26a..635238f 100644
--- a/contrib/gcc/config/sparc/elf.h
+++ b/contrib/gcc/config/sparc/elf.h
@@ -40,3 +40,19 @@ Boston, MA 02111-1307, USA.  */
 /* FIXME: until fixed */
 #undef LONG_DOUBLE_TYPE_SIZE
 #define LONG_DOUBLE_TYPE_SIZE 64
+
+/* This solaris2 define does not apply.  */
+#undef STDC_0_IN_SYSTEM_HEADERS
+
+/* We don't want to use the Solaris2 specific long long int conversion
+   routines.  */
+#undef INIT_SUBTARGET_OPTABS
+#define INIT_SUBTARGET_OPTABS
+
+/* ??? We haven't added Solaris2 equivalent 64 bit library routines to
+   lb1sp*.asm, so we need to avoid using them.  */
+#undef MULDI3_LIBCALL
+#undef DIVDI3_LIBCALL
+#undef UDIVDI3_LIBCALL
+#undef MODDI3_LIBCALL
+#undef UMODDI3_LIBCALL
diff --git a/contrib/gcc/config/sparc/gmon-sol2.c b/contrib/gcc/config/sparc/gmon-sol2.c
index 2a5b898..a6abcab 100644
--- a/contrib/gcc/config/sparc/gmon-sol2.c
+++ b/contrib/gcc/config/sparc/gmon-sol2.c
@@ -35,16 +35,8 @@
  * for Cygnus Support, July 1992.
  */
 
-#ifndef lint
-static char sccsid[] = "@(#)gmon.c	5.3 (Berkeley) 5/22/91";
-#endif /* not lint */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <unistd.h>
-#include <fcntl.h>
+#include "config.h"
+#include "system.h"
 
 #if 0
 #include "sparc/gmon.h"
@@ -96,7 +88,9 @@ static int	s_scale;
 
 #define	MSG "No space for profiling buffer(s)\n"
 
-static void moncontrol();
+static void moncontrol	PROTO ((int));
+extern void monstartup	PROTO ((char *, char *));
+extern void _mcleanup	PROTO ((void));
 
 void monstartup(lowpc, highpc)
     char	*lowpc;
@@ -185,7 +179,7 @@ _mcleanup()
     int			toindex;
     struct rawarc	rawarc;
     char		*profdir;
-    char		*proffile;
+    const char		*proffile;
     char		*progname;
     char		 buf[PATH_MAX];
     extern char	       **___Argv;
@@ -275,6 +269,8 @@ _mcleanup()
  * -- [eichin:19920702.1107EST]
  */
 
+static void internal_mcount PROTO((char *, unsigned short *)) ATTRIBUTE_UNUSED;
+
 /* i7 == last ret, -> frompcindex */
 /* o7 == current ret, -> selfpc */
 /* Solaris 2 libraries use _mcount.  */
@@ -297,9 +293,9 @@ static void internal_mcount(selfpc, frompcindex)
 	 */
 
 	if(!already_setup) {
-          extern etext();
+          extern char etext[];
 	  already_setup = 1;
-	  monstartup(0, etext);
+	  monstartup(0, (char *)etext);
 #ifdef USE_ONEXIT
 	  on_exit(_mcleanup, 0);
 #else
diff --git a/contrib/gcc/config/sparc/hal.h b/contrib/gcc/config/sparc/hal.h
new file mode 100644
index 0000000..0222b81
--- /dev/null
+++ b/contrib/gcc/config/sparc/hal.h
@@ -0,0 +1,33 @@
+/* Definitions of target machine for GNU compiler, for HAL 
+   SPARC running Solaris 2 HALOS
+   Copyright 1998 Free Software Foundation, Inc.
+   Contributed by Carol LePage (carolo@hal.com)
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* Need different command line for assembler */
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+  "%{V} %{v:%{!V:-V}} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Wa,*:%*} -e1 \
+   %{fpic:-K PIC} %{fPIC:-K PIC}"
+
+/* Need DWARF for debuggers.  */
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF_DEBUG
diff --git a/contrib/gcc/config/sparc/linux.h b/contrib/gcc/config/sparc/linux.h
index 7bbbfa4..d967b01 100644
--- a/contrib/gcc/config/sparc/linux.h
+++ b/contrib/gcc/config/sparc/linux.h
@@ -103,7 +103,7 @@ Boston, MA 02111-1307, USA.  */
 #define WCHAR_TYPE_SIZE BITS_PER_WORD
     
 #undef CPP_PREDEFINES
-#define CPP_PREDEFINES "-D__ELF__ -Dunix -Dsparc -D__sparc__ -Dlinux -Asystem(unix) -Asystem(posix)"
+#define CPP_PREDEFINES "-D__ELF__ -Dunix -D__sparc__ -Dlinux -Asystem(unix) -Asystem(posix)"
 
 #undef CPP_SUBTARGET_SPEC
 #ifdef USE_GNULIBC_1
diff --git a/contrib/gcc/config/sparc/linux64.h b/contrib/gcc/config/sparc/linux64.h
index 77bc668..705b5ca 100644
--- a/contrib/gcc/config/sparc/linux64.h
+++ b/contrib/gcc/config/sparc/linux64.h
@@ -19,9 +19,7 @@ along with GNU CC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
-/* ??? bi-architecture support will require changes to the linker
-   related specs, among perhaps other things (multilibs).  */
-/* #define SPARC_BI_ARCH */
+#define SPARC_BI_ARCH
 
 #define LINUX_DEFAULT_ELF
 
@@ -36,6 +34,16 @@ Boston, MA 02111-1307, USA.  */
 #undef MD_EXEC_PREFIX
 #undef MD_STARTFILE_PREFIX
 
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+/* A 64 bit v9 compiler with stack-bias,
+   in a Medium/Low code model environment.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \
+   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU)
+#endif
+
 /* Output at beginning of assembler file.  */
 /* The .file command should always begin the output.  */
 #undef ASM_FILE_START
@@ -54,11 +62,37 @@ Boston, MA 02111-1307, USA.  */
    object constructed before entering `main'. */
    
 #undef  STARTFILE_SPEC
-#define STARTFILE_SPEC \
+
+#define STARTFILE_SPEC32 \
   "%{!shared: \
      %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} %{!p:crt1.o%s}}}\
    crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
 
+#define STARTFILE_SPEC64 \
+  "%{!shared: \
+     %{pg:/usr/lib64/gcrt1.o%s} %{!pg:%{p:/usr/lib64/gcrt1.o%s} %{!p:/usr/lib64/crt1.o%s}}}\
+   /usr/lib64/crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+   
+#ifdef SPARC_BI_ARCH
+
+#if DEFAULT_ARCH32_P
+#define STARTFILE_SPEC "\
+%{m32:" STARTFILE_SPEC32 "} \
+%{m64:" STARTFILE_SPEC64 "} \
+%{!m32:%{!m64:" STARTFILE_SPEC32 "}}"
+#else
+#define STARTFILE_SPEC "\
+%{m32:" STARTFILE_SPEC32 "} \
+%{m64:" STARTFILE_SPEC64 "} \
+%{!m32:%{!m64:" STARTFILE_SPEC64 "}}"
+#endif
+
+#else
+
+#define STARTFILE_SPEC STARTFILE_SPEC64
+
+#endif
+
 /* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
    the GNU/Linux magical crtend.o file (see crtstuff.c) which
    provides part of the support for getting C++ file-scope static
@@ -66,23 +100,39 @@ Boston, MA 02111-1307, USA.  */
    GNU/Linux "finalizer" file, `crtn.o'.  */
 
 #undef  ENDFILE_SPEC
-#define ENDFILE_SPEC \
+
+#define ENDFILE_SPEC32 \
   "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
 
-#undef TARGET_VERSION
-#define TARGET_VERSION fprintf (stderr, " (sparc64 GNU/Linux with ELF)");
+#define ENDFILE_SPEC64 \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} /usr/lib64/crtn.o%s"
+  
+#ifdef SPARC_BI_ARCH
 
-/* A 64 bit v9 compiler with stack-bias,
-   in a Medium/Anywhere code model environment.  */
+#if DEFAULT_ARCH32_P
+#define ENDFILE_SPEC "\
+%{m32:" ENDFILE_SPEC32 "} \
+%{m64:" ENDFILE_SPEC64 "} \
+%{!m32:%{!m64:" ENDFILE_SPEC32 "}}"
+#else
+#define ENDFILE_SPEC "\
+%{m32:" ENDFILE_SPEC32 "} \
+%{m64:" ENDFILE_SPEC64 "} \
+%{!m32:%{!m64:" ENDFILE_SPEC64 "}}"
+#endif
 
-#undef TARGET_DEFAULT
-#define TARGET_DEFAULT \
-  (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \
-   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU)
+#else
+
+#define ENDFILE_SPEC ENDFILE_SPEC64
+
+#endif
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc64 GNU/Linux with ELF)");
 
 /* The default code model.  */
 #undef SPARC_DEFAULT_CMODEL
-#define SPARC_DEFAULT_CMODEL CM_MEDANY
+#define SPARC_DEFAULT_CMODEL CM_MEDLOW
 
 #undef WCHAR_TYPE
 #define WCHAR_TYPE "long int"
@@ -92,9 +142,9 @@ Boston, MA 02111-1307, USA.  */
 
 #undef LONG_DOUBLE_TYPE_SIZE
 #define LONG_DOUBLE_TYPE_SIZE 128
-    
+
 #undef CPP_PREDEFINES
-#define CPP_PREDEFINES "-D__ELF__ -Dunix -D_LONGLONG -Dsparc -D__sparc__ -Dlinux -Asystem(unix) -Asystem(posix)"
+#define CPP_PREDEFINES "-D__ELF__ -Dunix -D_LONGLONG -D__sparc__ -Dlinux -Asystem(unix) -Asystem(posix)"
 
 #undef CPP_SUBTARGET_SPEC
 #define CPP_SUBTARGET_SPEC "\
@@ -126,17 +176,88 @@ Boston, MA 02111-1307, USA.  */
 
 /* If ELF is the default format, we should not use /lib/elf. */
 
+#ifdef SPARC_BI_ARCH
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_arch32",       LINK_ARCH32_SPEC },              \
+  { "link_arch64",       LINK_ARCH64_SPEC },              \
+  { "link_arch_default", LINK_ARCH_DEFAULT_SPEC },	  \
+  { "link_arch",	 LINK_ARCH_SPEC },
+    
+#define LINK_ARCH32_SPEC "-m elf32_sparc -Y P,/usr/lib %{shared:-shared} \
+  %{!shared: \
+    %{!ibcs: \
+      %{!static: \
+        %{rdynamic:-export-dynamic} \
+        %{!dynamic-linker:-dynamic-linker /lib/ld-linux.so.2}} \
+        %{static:-static}}} \
+"
+
+#define LINK_ARCH64_SPEC "-m elf64_sparc -Y P,/usr/lib64 %{shared:-shared} \
+  %{!shared: \
+    %{!ibcs: \
+      %{!static: \
+        %{rdynamic:-export-dynamic} \
+        %{!dynamic-linker:-dynamic-linker /lib64/ld-linux.so.2}} \
+        %{static:-static}}} \
+"
+
+#define LINK_ARCH_SPEC "\
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
 #undef  LINK_SPEC
-#define LINK_SPEC "-m elf64_sparc -Y P,/usr/lib %{shared:-shared} \
+#define LINK_SPEC "\
+%(link_arch) \
+%{mlittle-endian:-EL} \
+"
+
+#undef	CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC "\
+%{sun4:} %{target:} \
+%{mcypress:-mcpu=cypress} \
+%{msparclite:-mcpu=sparclite} %{mf930:-mcpu=f930} %{mf934:-mcpu=f934} \
+%{mv8:-mcpu=v8} %{msupersparc:-mcpu=supersparc} \
+%{m64:-mptr64 -mcpu=ultrasparc -mstack-bias} \
+"
+#else
+#define CC1_SPEC "\
+%{sun4:} %{target:} \
+%{mcypress:-mcpu=cypress} \
+%{msparclite:-mcpu=sparclite} %{mf930:-mcpu=f930} %{mf934:-mcpu=f934} \
+%{mv8:-mcpu=v8} %{msupersparc:-mcpu=supersparc} \
+%{m32:-mptr32 -mcpu=cypress -mno-stack-bias} \
+"
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#else /* !SPARC_BI_ARCH */
+
+#undef LINK_SPEC
+#define LINK_ARCH_SPEC "-m elf64_sparc -Y P,/usr/lib64 %{shared:-shared} \
   %{!shared: \
     %{!ibcs: \
       %{!static: \
         %{rdynamic:-export-dynamic} \
-        %{!dynamic-linker:-dynamic-linker /lib/ld-linux64.so.2}} \
+        %{!dynamic-linker:-dynamic-linker /lib64/ld-linux.so.2}} \
         %{static:-static}}} \
 %{mlittle-endian:-EL} \
 "
 
+#endif /* !SPARC_BI_ARCH */
+
 /* The sun bundled assembler doesn't accept -Yd, (and neither does gas).
    It's safe to pass -s always, even if -g is not used. */
 #undef ASM_SPEC
diff --git a/contrib/gcc/config/sparc/pbd.h b/contrib/gcc/config/sparc/pbd.h
index 459bffd..b70fdcb 100644
--- a/contrib/gcc/config/sparc/pbd.h
+++ b/contrib/gcc/config/sparc/pbd.h
@@ -144,35 +144,7 @@ Boston, MA 02111-1307, USA.  */
 #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
   fprintf (FILE, "\t.word .L%d\n", VALUE)
 
-/* Output assembler code to FILE to increment profiler label # LABELNO
-   for profiling a function entry.  */
-
-#undef FUNCTION_PROFILER
-#define FUNCTION_PROFILER(FILE, LABELNO)  \
-  fprintf (FILE, "\tsethi %%hi(.LP%d),%%o0\n\tcall mcount\n\tor %%lo(.LP%d),%%o0,%%o0\n", \
-	   (LABELNO), (LABELNO))
-
-/* Output assembler code to FILE to initialize this source file's
-   basic block profiling info, if that has not already been done.  */
-
-#undef FUNCTION_BLOCK_PROFILER
-#define FUNCTION_BLOCK_PROFILER(FILE, LABELNO)  \
-  fprintf (FILE, "\tsethi %%hi(.LPBX0),%%o0\n\tld [%%lo(.LPBX0)+%%o0],%%o1\n\ttst %%o1\n\tbne .LPY%d\n\tnop\n\tcall ___bb_init_func\n\tnop\n.LPY%d:\n",  \
-	   (LABELNO), (LABELNO))
-
-/* Output assembler code to FILE to increment the entry-count for
-   the BLOCKNO'th basic block in this source file.  */
-
-#undef BLOCK_PROFILER
-#define BLOCK_PROFILER(FILE, BLOCKNO) \
-{								\
-  int blockn = (BLOCKNO);					\
-  fprintf (FILE, "\tsethi %%hi(.LPBX2+%d),%%g1\n\tld [%%lo(.LPBX2+%d)+%%g1],%%g2\n\
-\tadd %%g2,1,%%g2\n\tst %%g2,[%%lo(.LPBX2+%d)+%%g1]\n",		\
-	   4 * blockn, 4 * blockn, 4 * blockn);			\
-  CC_STATUS_INIT;  /* We have clobbered %g1.  Also %g2.  */	\
-}
-/*   This is needed for SunOS 4.0, and should not hurt for 3.2
+/* This is needed for SunOS 4.0, and should not hurt for 3.2
    versions either.  */
 #undef ASM_OUTPUT_SOURCE_LINE(file, line) 
 #define ASM_OUTPUT_SOURCE_LINE(file, line)		\
diff --git a/contrib/gcc/config/sparc/sol2-c1.asm b/contrib/gcc/config/sparc/sol2-c1.asm
index 618d698..894a8c3 100644
--- a/contrib/gcc/config/sparc/sol2-c1.asm
+++ b/contrib/gcc/config/sparc/sol2-c1.asm
@@ -1,4 +1,4 @@
-! crt1.s for solaris 2.0.
+! crt1.s for sparc & sparcv9 (SunOS 5)
 
 !   Copyright (C) 1992 Free Software Foundation, Inc.
 !   Written By David Vinayak Henkel-Wallace, June 1992
@@ -37,43 +37,67 @@
 ! in section 3 of the SVr4 ABI.
 ! This file is the first thing linked into any executable.
 
+#ifdef __sparcv9
+#define	CPTRSIZE	8
+#define	CPTRSHIFT	3
+#define	STACK_BIAS	2047
+#define	ldn		ldx
+#define	stn		stx
+#define setn(s, scratch, dst)	setx s, scratch, dst
+#else
+#define	CPTRSIZE	4
+#define	CPTRSHIFT	2
+#define	STACK_BIAS	0
+#define	ldn		ld
+#define	stn		st
+#define setn(s, scratch, dst)	set s, dst
+#endif
+
 	.section	".text"
 	.proc	022
 	.global	_start
 
 _start:
 	mov	0, %fp		! Mark bottom frame pointer
-	ld	[%sp + 64], %l0	! argc
-	add	%sp, 68, %l1	! argv
+	ldn	[%sp + (16 * CPTRSIZE) + STACK_BIAS], %l0	! argc
+	add	%sp, (17 * CPTRSIZE) + STACK_BIAS, %l1		! argv
 
 	! Leave some room for a call.  Sun leaves 32 octets (to sit on
 	! a cache line?) so we do too.
+#ifdef __sparcv9
+	sub	%sp, 48, %sp
+#else
 	sub	%sp, 32, %sp
+#endif
 
 	! %g1 may contain a function to be registered w/atexit
 	orcc	%g0, %g1, %g0
+#ifdef __sparcv9
+	be	%xcc, .nope
+#else
 	be	.nope
+#endif
 	mov	%g1, %o0
 	call	atexit
 	nop   
 .nope:
 	! Now make sure constructors and destructors are handled.
-	set	_fini, %o0
+	setn(_fini, %o1, %o0)
 	call	atexit, 1
 	nop
 	call	_init, 0
 	nop
 
-	! We ignore the auxiliary vector; there's no defined way to
+	! We ignore the auxiliary vector; there is no defined way to
 	! access those data anyway.  Instead, go straight to main:
 	mov	%l0, %o0	! argc
 	mov	%l1, %o1	! argv
 	! Skip argc words past argv, to env:
-	sll	%l0, 2, %o2
-	add	%o2, 4, %o2
+	sll	%l0, CPTRSHIFT, %o2
+	add	%o2, CPTRSIZE, %o2
 	add	%l1, %o2, %o2	! env
-	set	_environ, %o3
-	st	%o2, [%o3]	! *_environ
+	setn(_environ, %o4, %o3)
+	stn	%o2, [%o3]	! *_environ
 	call	main, 4
 	nop   
 	call	exit, 0
diff --git a/contrib/gcc/config/sparc/sol2-ci.asm b/contrib/gcc/config/sparc/sol2-ci.asm
index dd09a34..3dc793c 100644
--- a/contrib/gcc/config/sparc/sol2-ci.asm
+++ b/contrib/gcc/config/sparc/sol2-ci.asm
@@ -48,7 +48,11 @@
 	.type	_init,#function
 	.align	4
 _init:
+#ifdef __sparcv9
+	save	%sp, -176, %sp
+#else
 	save	%sp, -96, %sp
+#endif
 
 
 	.section	".fini"
@@ -57,4 +61,8 @@ _init:
 	.type	_fini,#function
 	.align	4
 _fini:
+#ifdef __sparcv9
+	save	%sp, -176, %sp
+#else
 	save	%sp, -96, %sp
+#endif
diff --git a/contrib/gcc/config/sparc/sol2-cn.asm b/contrib/gcc/config/sparc/sol2-cn.asm
index 3c5d508..49e070f 100644
--- a/contrib/gcc/config/sparc/sol2-cn.asm
+++ b/contrib/gcc/config/sparc/sol2-cn.asm
@@ -51,4 +51,4 @@
 	ret
 	restore
 
-! Th-th-th-that's all folks!
+! Th-th-th-that is all folks!
diff --git a/contrib/gcc/config/sparc/sol2-sld-64.h b/contrib/gcc/config/sparc/sol2-sld-64.h
new file mode 100644
index 0000000..c2518d8
--- /dev/null
+++ b/contrib/gcc/config/sparc/sol2-sld-64.h
@@ -0,0 +1,363 @@
+/* Definitions of target machine for GNU compiler, for 64-bit SPARC
+   running Solaris 2 using the system linker.  */
+
+#define SPARC_BI_ARCH
+
+#include "sparc/sol2.h"
+
+/* At least up through Solaris 2.6,
+   the system linker does not work with DWARF or DWARF2,
+   since it does not have working support for relocations
+   to unaligned data.  */
+
+#define LINKER_DOES_NOT_WORK_WITH_DWARF2
+
+/* A 64 bit v9 compiler with stack-bias */
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ + \
+   MASK_STACK_BIAS + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU)
+#endif
+
+/* The default code model.  */
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDANY
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC	""
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC	"-xarch=v9"
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9a"
+#endif
+
+/* The sun bundled assembler doesn't accept -Yd, (and neither does gas).
+   It's safe to pass -s always, even if -g is not used. */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Wa,*:%*} -s \
+%{fpic:-K PIC} %{fPIC:-K PIC} \
+%(asm_cpu)\
+"
+
+#if DEFAULT_ARCH32_P
+#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}"
+#else
+#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}"
+#endif
+
+#undef CPP_CPU_SPEC
+#define CPP_CPU_SPEC "\
+%{mcypress:} \
+%{msparclite:-D__sparclite__} \
+%{mf930:-D__sparclite__} %{mf934:-D__sparclite__} \
+%{mv8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{msupersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=sparclet:-D__sparclet__} %{mcpu=tsc701:-D__sparclet__} \
+%{mcpu=sparclite:-D__sparclite__} \
+%{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
+%{mcpu=v8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=supersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=v9:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
+"
+
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC "\
+%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC("-xarch=v9a") "} \
+%{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC("-xarch=v9") "} \
+%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC("-xarch=v9") "}}} \
+%{!mcpu*:%(asm_cpu_default)} \
+"
+
+#define STARTFILE_SPEC32 "\
+%{ansi:values-Xc.o%s} \
+%{!ansi: \
+ %{traditional:values-Xt.o%s} \
+ %{!traditional:values-Xa.o%s}}"
+
+#define STARTFILE_SPEC64 "\
+%{ansi:/usr/lib/sparcv9/values-Xc.o%s} \
+%{!ansi: \
+ %{traditional:/usr/lib/sparcv9/values-Xt.o%s} \
+ %{!traditional:/usr/lib/sparcv9/values-Xa.o%s}}"
+ 
+#ifdef SPARC_BI_ARCH
+
+#if DEFAULT_ARCH32_P
+#define STARTFILE_ARCH_SPEC "\
+%{m32:" STARTFILE_SPEC32 "} \
+%{m64:" STARTFILE_SPEC64 "} \
+%{!m32:%{!m64:" STARTFILE_SPEC32 "}}"
+#else
+#define STARTFILE_ARCH_SPEC "\
+%{m32:" STARTFILE_SPEC32 "} \
+%{m64:" STARTFILE_SPEC64 "} \
+%{!m32:%{!m64:" STARTFILE_SPEC64 "}}"
+#endif
+
+#else /* !SPARC_BI_ARCH */
+
+/* In this case we define MD_STARTFILE_PREFIX to /usr/lib/sparcv9/ */
+#define STARTFILE_ARCH_SPEC STARTFILE_SPEC32
+
+#endif /* !SPARC_BI_ARCH */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{p:mcrt1.o%s} \
+                          %{!p: \
+	                    %{pg:gcrt1.o%s gmon.o%s} \
+                            %{!pg:crt1.o%s}}}} \
+			crti.o%s" STARTFILE_ARCH_SPEC " \
+			crtbegin.o%s"
+
+#ifdef SPARC_BI_ARCH
+
+#undef CPP_CPU_DEFAULT_SPEC
+#define CPP_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" CPP_CPU64_DEFAULT_SPEC "} \
+%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" CPP_CPU32_DEFAULT_SPEC "} \
+%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \
+")
+
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
+#undef CPP_ARCH32_SPEC
+#define CPP_ARCH32_SPEC "-D__SIZE_TYPE__=unsigned\\ int -D__PTRDIFF_TYPE__=int \
+-D__GCC_NEW_VARARGS__ -Acpu(sparc) -Amachine(sparc)"
+#undef CPP_ARCH64_SPEC
+#define CPP_ARCH64_SPEC "-D__SIZE_TYPE__=long\\ unsigned\\ int -D__PTRDIFF_TYPE__=long\\ int \
+-D__arch64__ -Acpu(sparc64) -Amachine(sparcv9) -D__sparcv9"
+
+#undef CPP_ARCH_SPEC
+#define CPP_ARCH_SPEC "\
+%{m32:%(cpp_arch32)} \
+%{m64:%(cpp_arch64)} \
+%{!m32:%{!m64:%(cpp_arch_default)}} \
+"
+
+#undef ASM_ARCH_SPEC
+#define ASM_ARCH_SPEC ""
+
+#undef ASM_ARCH32_SPEC
+#define ASM_ARCH32_SPEC ""
+
+#undef ASM_ARCH64_SPEC
+#define ASM_ARCH64_SPEC ""
+
+#undef ASM_ARCH_DEFAULT_SPEC
+#define ASM_ARCH_DEFAULT_SPEC ""
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_arch32",       LINK_ARCH32_SPEC },              \
+  { "link_arch64",       LINK_ARCH64_SPEC },              \
+  { "link_arch_default", LINK_ARCH_DEFAULT_SPEC },	  \
+  { "link_arch",	 LINK_ARCH_SPEC },
+    
+/* This should be the same as in svr4.h, except with -R added.  */
+#define LINK_ARCH32_SPEC \
+  "%{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{compat-bsd: \
+     %{!YP,*:%{p:-Y P,/usr/ucblib:/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \
+       %{pg:-Y P,/usr/ucblib:/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \
+       %{!p:%{!pg:-Y P,/usr/ucblib:/usr/ccs/lib:/usr/lib}}} \
+     -R /usr/ucblib} \
+   %{!compat-bsd: \
+     %{!YP,*:%{p:-Y P,/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \
+       %{pg:-Y P,/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \
+       %{!p:%{!pg:-Y P,/usr/ccs/lib:/usr/lib}}}}"
+
+#define LINK_ARCH64_SPEC \
+  "%{mcmodel=medlow:-M /usr/lib/ld/sparcv9/map.below4G} \
+   %{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{compat-bsd: \
+     %{!YP,*:%{p:-Y P,/usr/ucblib/sparcv9:/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{pg:-Y P,/usr/ucblib/sparcv9:/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,/usr/ucblib/sparcv9:/usr/lib/sparcv9}}} \
+     -R /usr/ucblib} \
+   %{!compat-bsd: \
+     %{!YP,*:%{p:-Y P,/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{pg:-Y P,/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,/usr/lib/sparcv9}}}}"
+
+#define LINK_ARCH_SPEC "\
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{h*} %{v:-V} \
+   %{b} %{Wl,*:%*} \
+   %{static:-dn -Bstatic} \
+   %{shared:-G -dy %{!mimpure-text:-z text}} \
+   %{symbolic:-Bsymbolic -G -dy -z text} \
+   %(link_arch) \
+   %{Qy:} %{!Qn:-Qy}"
+
+#undef	CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC "\
+%{sun4:} %{target:} \
+%{mcypress:-mcpu=cypress} \
+%{msparclite:-mcpu=sparclite} %{mf930:-mcpu=f930} %{mf934:-mcpu=f934} \
+%{mv8:-mcpu=v8} %{msupersparc:-mcpu=supersparc} \
+%{m64:-mptr64 -mcpu=v9 -mstack-bias -mno-v8plus} \
+"
+#else
+#define CC1_SPEC "\
+%{sun4:} %{target:} \
+%{mcypress:-mcpu=cypress} \
+%{msparclite:-mcpu=sparclite} %{mf930:-mcpu=f930} %{mf934:-mcpu=f934} \
+%{mv8:-mcpu=v8} %{msupersparc:-mcpu=supersparc} \
+%{m32:-mptr32 -mcpu=cypress -mno-stack-bias} \
+%{mv8plus:-m32 -mptr32 -mcpu=cypress -mno-stack-bias} \
+"
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#else /* !SPARC_BI_ARCH */
+
+/*
+ * This should be the same as in sol2-sld.h, except with "/sparcv9"
+ * appended to the paths and /usr/ccs/lib is no longer necessary
+ */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{h*} %{v:-V} \
+   %{b} %{Wl,*:%*} \
+   %{static:-dn -Bstatic} \
+   %{shared:-G -dy %{!mimpure-text:-z text}} \
+   %{symbolic:-Bsymbolic -G -dy -z text} \
+   %{mcmodel=medlow:-M /usr/lib/ld/sparcv9/map.below4G} \
+   %{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{compat-bsd: \
+     %{!YP,*:%{p:-Y P,/usr/ucblib/sparcv9:/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{pg:-Y P,/usr/ucblib/sparcv9:/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,/usr/ucblib/sparcv9:/usr/lib/sparcv9}}} \
+     -R /usr/ucblib} \
+   %{!compat-bsd: \
+     %{!YP,*:%{p:-Y P,/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{pg:-Y P,/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,/usr/lib/sparcv9}}}} \
+   %{Qy:} %{!Qn:-Qy}"
+   
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/lib/sparcv9/"
+ 
+#endif /* ! SPARC_BI_ARCH */
+
+/*
+ * Attempt to turn on access permissions for the stack.
+ *
+ * This code must be defined when compiling gcc but not when compiling
+ * libgcc2.a, unless we're generating code for 64 bits SPARC
+ *
+ * _SC_STACK_PROT is only defined for post 2.6, but we want this code
+ * to run always.  2.6 can change the stack protection but has no way to
+ * query it.
+ *
+ */
+
+#define TRANSFER_FROM_TRAMPOLINE					\
+static int need_enable_exec_stack;					\
+									\
+static void check_enabling(void) __attribute__ ((constructor));		\
+static void check_enabling(void)					\
+{									\
+  extern long sysconf(int);						\
+									\
+  int prot = (int) sysconf(515 /*_SC_STACK_PROT */);			\
+  if (prot != 7)							\
+    need_enable_exec_stack = 1;						\
+}									\
+									\
+void									\
+__enable_execute_stack (addr)						\
+     void *addr;							\
+{									\
+  if (!need_enable_exec_stack)						\
+    return;								\
+  else {								\
+    long size = getpagesize ();						\
+    long mask = ~(size-1);						\
+    char *page = (char *) (((long) addr) & mask); 			\
+    char *end  = (char *) ((((long) (addr + TRAMPOLINE_SIZE)) & mask) + size); \
+									\
+    /* 7 is PROT_READ | PROT_WRITE | PROT_EXEC */ 			\
+    if (mprotect (page, end - page, 7) < 0)				\
+      perror ("mprotect of trampoline code");				\
+  }									\
+}
+
+/* A C statement (sans semicolon) to output an element in the table of
+   global constructors.  */
+#undef ASM_OUTPUT_CONSTRUCTOR
+#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME)				\
+  do {									\
+    ctors_section ();							\
+    fprintf (FILE, "\t%s\t ", TARGET_ARCH64 ? ASM_LONGLONG : INT_ASM_OP); \
+    assemble_name (FILE, NAME);						\
+    fprintf (FILE, "\n");						\
+  } while (0)
+
+/* A C statement (sans semicolon) to output an element in the table of
+   global destructors.  */
+#undef ASM_OUTPUT_DESTRUCTOR
+#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME)       				\
+  do {									\
+    dtors_section ();                   				\
+    fprintf (FILE, "\t%s\t ", TARGET_ARCH64 ? ASM_LONGLONG : INT_ASM_OP); \
+    assemble_name (FILE, NAME);              				\
+    fprintf (FILE, "\n");						\
+  } while (0)
+
diff --git a/contrib/gcc/config/sparc/sol2.h b/contrib/gcc/config/sparc/sol2.h
index a0fa4a8..9274f9d 100644
--- a/contrib/gcc/config/sparc/sol2.h
+++ b/contrib/gcc/config/sparc/sol2.h
@@ -195,10 +195,14 @@ Boston, MA 02111-1307, USA.  */
 
 #undef INIT_SUBTARGET_OPTABS
 #define INIT_SUBTARGET_OPTABS	\
-  fixsfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, "__ftoll");	\
-  fixunssfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, "__ftoull");	\
-  fixdfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, "__dtoll");	\
-  fixunsdfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, "__dtoull")
+  fixsfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, \
+	TARGET_ARCH64 ? "__ftol" : "__ftoll");	\
+  fixunssfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, \
+	TARGET_ARCH64 ? "__ftoul" : "__ftoull");	\
+  fixdfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, \
+	TARGET_ARCH64 ? "__dtol" : "__dtoll");	\
+  fixunsdfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, \
+	TARGET_ARCH64 ? "__dtoul" : "__dtoull")
 
 /* No weird SPARC variants on Solaris */
 #undef TARGET_LIVE_G0
diff --git a/contrib/gcc/config/sparc/sp64-elf.h b/contrib/gcc/config/sparc/sp64-elf.h
index 2482866..4fd81c5 100644
--- a/contrib/gcc/config/sparc/sp64-elf.h
+++ b/contrib/gcc/config/sparc/sp64-elf.h
@@ -80,8 +80,8 @@ crtbegin.o%s \
 /* V9 chips can handle either endianness.  */
 #undef SUBTARGET_SWITCHES
 #define SUBTARGET_SWITCHES \
-{"big-endian", -MASK_LITTLE_ENDIAN}, \
-{"little-endian", MASK_LITTLE_ENDIAN},
+{"big-endian", -MASK_LITTLE_ENDIAN, "Generate code for big endian" }, \
+{"little-endian", MASK_LITTLE_ENDIAN, "Generate code for little endian" },
 
 #undef BYTES_BIG_ENDIAN
 #define BYTES_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
@@ -102,9 +102,10 @@ crtbegin.o%s \
 /* The medium/anywhere code model practically requires us to put jump tables
    in the text section as gcc is unable to distinguish LABEL_REF's of jump
    tables from other label refs (when we need to).  */
-/* ??? Revisit this.  */
+/* But we now defer the tables to the end of the function, so we make
+   this 0 to not confuse the branch shortening code.  */
 #undef JUMP_TABLES_IN_TEXT_SECTION
-#define JUMP_TABLES_IN_TEXT_SECTION 1
+#define JUMP_TABLES_IN_TEXT_SECTION 0
 
 /* System V Release 4 uses DWARF debugging info.
    GDB doesn't support 64 bit stabs yet and the desired debug format is DWARF
diff --git a/contrib/gcc/config/sparc/sparc.c b/contrib/gcc/config/sparc/sparc.c
index e350729..ad12ec3 100644
--- a/contrib/gcc/config/sparc/sparc.c
+++ b/contrib/gcc/config/sparc/sparc.c
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.c for Sun SPARC.
-   Copyright (C) 1987, 88, 89, 92-97, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com)
    64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
    at Cygnus Support.
@@ -98,20 +98,28 @@ char leaf_reg_remap[] =
    this is "%sp+something".  We record "something" separately as it may be
    too big for reg+constant addressing.  */
 
-static char *frame_base_name;
+static const char *frame_base_name;
 static int frame_base_offset;
 
 static rtx pic_setup_code	PROTO((void));
-static rtx find_addr_reg	PROTO((rtx));
 static void sparc_init_modes	PROTO((void));
-static int save_regs		PROTO((FILE *, int, int, char *,
+static int save_regs		PROTO((FILE *, int, int, const char *,
 				       int, int, int));
-static int restore_regs		PROTO((FILE *, int, int, char *, int, int));
-static void build_big_number	PROTO((FILE *, int, char *));
+static int restore_regs		PROTO((FILE *, int, int, const char *, int, int));
+static void build_big_number	PROTO((FILE *, int, const char *));
 static int function_arg_slotno	PROTO((const CUMULATIVE_ARGS *,
 				       enum machine_mode, tree, int, int,
 				       int *, int *));
 
+static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
+static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
+static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
+
+static void sparc_output_addr_vec PROTO((rtx));
+static void sparc_output_addr_diff_vec PROTO((rtx));
+static void sparc_output_deferred_case_vectors PROTO((void));
+
+
 #ifdef DWARF2_DEBUGGING_INFO
 extern char *dwarf2out_cfi_label ();
 #endif
@@ -119,14 +127,14 @@ extern char *dwarf2out_cfi_label ();
 /* Option handling.  */
 
 /* Code model option as passed by user.  */
-char *sparc_cmodel_string;
+const char *sparc_cmodel_string;
 /* Parsed value.  */
 enum cmodel sparc_cmodel;
 
 /* Record alignment options as passed by user.  */
-char *sparc_align_loops_string;
-char *sparc_align_jumps_string;
-char *sparc_align_funcs_string;
+const char *sparc_align_loops_string;
+const char *sparc_align_jumps_string;
+const char *sparc_align_funcs_string;
 
 /* Parsed values, as a power of two.  */
 int sparc_align_loops;
@@ -152,7 +160,7 @@ void
 sparc_override_options ()
 {
   static struct code_model {
-    char *name;
+    const char *name;
     int value;
   } cmodels[] = {
     { "32", CM_32 },
@@ -166,13 +174,15 @@ sparc_override_options ()
   /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
   static struct cpu_default {
     int cpu;
-    char *name;
+    const char *name;
   } cpu_default[] = {
     /* There must be one entry here for each TARGET_CPU value.  */
     { TARGET_CPU_sparc, "cypress" },
     { TARGET_CPU_sparclet, "tsc701" },
     { TARGET_CPU_sparclite, "f930" },
     { TARGET_CPU_v8, "v8" },
+    { TARGET_CPU_hypersparc, "hypersparc" },
+    { TARGET_CPU_sparclite86x, "sparclite86x" },
     { TARGET_CPU_supersparc, "supersparc" },
     { TARGET_CPU_v9, "v9" },
     { TARGET_CPU_ultrasparc, "ultrasparc" },
@@ -181,7 +191,7 @@ sparc_override_options ()
   struct cpu_default *def;
   /* Table of values for -m{cpu,tune}=.  */
   static struct cpu_table {
-    char *name;
+    const char *name;
     enum processor_type processor;
     int disable;
     int enable;
@@ -196,6 +206,8 @@ sparc_override_options ()
        The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
     { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
     { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
+    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
+    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, MASK_V8 },
     { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
     /* TEMIC sparclet */
     { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
@@ -207,7 +219,7 @@ sparc_override_options ()
   struct cpu_table *cpu;
   struct sparc_cpu_select *sel;
   int fpu;
-
+  
 #ifndef SPARC_BI_ARCH
   /* Check for unsupported architecture size.  */
   if (! TARGET_64BIT != DEFAULT_ARCH32_P)
@@ -217,8 +229,25 @@ sparc_override_options ()
     }
 #endif
 
+  /* At the moment we don't allow different pointer size and architecture */
+  if (! TARGET_64BIT != ! TARGET_PTR64)
+    {
+      error ("-mptr%d not allowed on -m%d",
+      	     TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
+      if (TARGET_64BIT)
+    	target_flags |= MASK_PTR64;
+      else
+        target_flags &= ~MASK_PTR64;
+    }
+
   /* Code model selection.  */
   sparc_cmodel = SPARC_DEFAULT_CMODEL;
+  
+#ifdef SPARC_BI_ARCH
+  if (TARGET_ARCH32)
+    sparc_cmodel = CM_32;
+#endif
+
   if (sparc_cmodel_string != NULL)
     {
       if (TARGET_ARCH64)
@@ -277,13 +306,17 @@ sparc_override_options ()
   if (TARGET_V9 && TARGET_ARCH32)
     target_flags |= MASK_DEPRECATED_V8_INSNS;
 
-  /* V8PLUS requires V9 */
-  if (! TARGET_V9)
+  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
+  if (! TARGET_V9 || TARGET_ARCH64)
     target_flags &= ~MASK_V8PLUS;
 
   /* Don't use stack biasing in 32 bit mode.  */
   if (TARGET_ARCH32)
     target_flags &= ~MASK_STACK_BIAS;
+    
+  /* Don't allow -mvis if FPU is disabled.  */
+  if (! TARGET_FPU)
+    target_flags &= ~MASK_VIS;
 
   /* Validate -malign-loops= value, or provide default.  */
   if (sparc_align_loops_string)
@@ -332,6 +365,12 @@ sparc_override_options ()
 
   /* Do various machine dependent initializations.  */
   sparc_init_modes ();
+
+  if ((profile_flag || profile_block_flag)
+      && sparc_cmodel != CM_MEDLOW)
+    {
+      error ("profiling does not support code models other than medlow");
+    }
 }
 
 /* Miscellaneous utilities.  */
@@ -347,14 +386,6 @@ v9_regcmp_p (code)
 	  || code == LE || code == GT);
 }
 
-/* 32 bit registers are zero extended so only zero/non-zero comparisons
-   work.  */
-int
-v8plus_regcmp_p (code)
-     enum rtx_code code;
-{
-  return (code == EQ || code == NE);
-}
 
 /* Operand constraints.  */
 
@@ -637,56 +668,6 @@ reg_or_nonsymb_mem_operand (op, mode)
 }
 
 int
-sparc_operand (op, mode)
-     rtx op;
-     enum machine_mode mode;
-{
-  if (register_operand (op, mode)
-      || GET_CODE (op) == CONSTANT_P_RTX)
-    return 1;
-  if (GET_CODE (op) == CONST_INT)
-    return SMALL_INT (op);
-  if (GET_MODE (op) != mode)
-    return 0;
-  if (GET_CODE (op) == SUBREG)
-    op = SUBREG_REG (op);
-  if (GET_CODE (op) != MEM)
-    return 0;
-
-  op = XEXP (op, 0);
-  if (GET_CODE (op) == LO_SUM)
-    return (GET_CODE (XEXP (op, 0)) == REG
-	    && symbolic_operand (XEXP (op, 1), Pmode));
-  return memory_address_p (mode, op);
-}
-
-int
-move_operand (op, mode)
-     rtx op;
-     enum machine_mode mode;
-{
-  if (mode == DImode && arith_double_operand (op, mode))
-    return 1;
-  if (register_operand (op, mode)
-      || GET_CODE (op) == CONSTANT_P_RTX)
-    return 1;
-  if (GET_CODE (op) == CONST_INT)
-    return SMALL_INT (op) || SPARC_SETHI_P (INTVAL (op));
-
-  if (GET_MODE (op) != mode)
-    return 0;
-  if (GET_CODE (op) == SUBREG)
-    op = SUBREG_REG (op);
-  if (GET_CODE (op) != MEM)
-    return 0;
-  op = XEXP (op, 0);
-  if (GET_CODE (op) == LO_SUM)
-    return (register_operand (XEXP (op, 0), Pmode)
-	    && CONSTANT_P (XEXP (op, 1)));
-  return memory_address_p (mode, op);
-}
-
-int
 splittable_symbolic_memory_operand (op, mode)
      rtx op;
      enum machine_mode mode ATTRIBUTE_UNUSED;
@@ -775,17 +756,6 @@ v9_regcmp_op (op, mode)
   return v9_regcmp_p (code);
 }
 
-/* ??? Same as eq_or_neq.  */
-int
-v8plus_regcmp_op (op, mode)
-     register rtx op;
-     enum machine_mode mode ATTRIBUTE_UNUSED;
-{
-  enum rtx_code code = GET_CODE (op);
-
-  return (code == EQ || code == NE);
-}
-
 /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
 
 int
@@ -837,8 +807,7 @@ arith_operand (op, mode)
      enum machine_mode mode;
 {
   int val;
-  if (register_operand (op, mode)
-      || GET_CODE (op) == CONSTANT_P_RTX)
+  if (register_operand (op, mode))
     return 1;
   if (GET_CODE (op) != CONST_INT)
     return 0;
@@ -846,6 +815,72 @@ arith_operand (op, mode)
   return SPARC_SIMM13_P (val);
 }
 
+/* Return true if OP is a constant 4096  */
+
+int
+arith_4096_operand (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  int val;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  val = INTVAL (op) & 0xffffffff;
+  return val == 4096;
+}
+
+/* Return true if OP is suitable as second operand for add/sub */
+
+int
+arith_add_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return arith_operand (op, mode) || arith_4096_operand (op, mode);
+}
+
+/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
+   immediate field of OR and XOR instructions.  Used for 64-bit
+   constant formation patterns.  */
+int
+const64_operand (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return ((GET_CODE (op) == CONST_INT
+	   && SPARC_SIMM13_P (INTVAL (op)))
+#if HOST_BITS_PER_WIDE_INT != 64
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
+	      && (CONST_DOUBLE_HIGH (op) ==
+		  ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
+		   (HOST_WIDE_INT)0xffffffff : 0)))
+#endif
+	  );
+}
+
+/* The same, but only for sethi instructions.  */
+int
+const64_high_operand (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return ((GET_CODE (op) == CONST_INT
+	   && (INTVAL (op) & 0xfffffc00) != 0
+	   && SPARC_SETHI_P (INTVAL (op))
+#if HOST_BITS_PER_WIDE_INT != 64
+	   /* Must be positive on non-64bit host else the
+	      optimizer is fooled into thinking that sethi
+	      sign extends, even though it does not.  */
+	   && INTVAL (op) >= 0
+#endif
+	   )
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && CONST_DOUBLE_HIGH (op) == 0
+	      && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
+	      && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
+}
+
 /* Return true if OP is a register, or is a CONST_INT that can fit in a
    signed 11 bit immediate field.  This is an acceptable SImode operand for
    the movcc instructions.  */
@@ -856,7 +891,6 @@ arith11_operand (op, mode)
      enum machine_mode mode;
 {
   return (register_operand (op, mode)
-	  || GET_CODE (op) == CONSTANT_P_RTX
 	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
 }
 
@@ -870,7 +904,6 @@ arith10_operand (op, mode)
      enum machine_mode mode;
 {
   return (register_operand (op, mode)
-	  || GET_CODE (op) == CONSTANT_P_RTX
 	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
 }
 
@@ -887,7 +920,6 @@ arith_double_operand (op, mode)
      enum machine_mode mode;
 {
   return (register_operand (op, mode)
-	  || GET_CODE (op) == CONSTANT_P_RTX
 	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
 	  || (! TARGET_ARCH64
 	      && GET_CODE (op) == CONST_DOUBLE
@@ -902,6 +934,30 @@ arith_double_operand (op, mode)
 		      && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
 }
 
+/* Return true if OP is a constant 4096 for DImode on ARCH64 */
+
+int
+arith_double_4096_operand (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return (TARGET_ARCH64 &&
+  	  ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
+  	   (GET_CODE (op) == CONST_DOUBLE &&
+  	    CONST_DOUBLE_LOW (op) == 4096 &&
+  	    CONST_DOUBLE_HIGH (op) == 0)));
+}
+
+/* Return true if OP is suitable as second operand for add/sub in DImode */
+
+int
+arith_double_add_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
+}
+
 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
    can fit in an 11 bit immediate field.  This is an acceptable DImode
    operand for the movcc instructions.  */
@@ -913,7 +969,6 @@ arith11_double_operand (op, mode)
      enum machine_mode mode;
 {
   return (register_operand (op, mode)
-	  || GET_CODE (op) == CONSTANT_P_RTX
 	  || (GET_CODE (op) == CONST_DOUBLE
 	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
 	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
@@ -937,7 +992,6 @@ arith10_double_operand (op, mode)
      enum machine_mode mode;
 {
   return (register_operand (op, mode)
-	  || GET_CODE (op) == CONSTANT_P_RTX
 	  || (GET_CODE (op) == CONST_DOUBLE
 	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
 	      && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
@@ -959,8 +1013,18 @@ small_int (op, mode)
      rtx op;
      enum machine_mode mode ATTRIBUTE_UNUSED;
 {
+  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
+}
+
+int
+small_int_or_double (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
   return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
-	  || GET_CODE (op) == CONSTANT_P_RTX);
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && CONST_DOUBLE_HIGH (op) == 0
+	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
 }
 
 /* Recognize operand values for the umul instruction.  That instruction sign
@@ -974,16 +1038,15 @@ uns_small_int (op, mode)
 {
 #if HOST_BITS_PER_WIDE_INT > 32
   /* All allowed constants will fit a CONST_INT.  */
-  return ((GET_CODE (op) == CONST_INT
-	   && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
-	       || (INTVAL (op) >= 0xFFFFF000 && INTVAL (op) < 0x100000000L)))
-	  || GET_CODE (op) == CONSTANT_P_RTX);
+  return (GET_CODE (op) == CONST_INT
+	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
+	      || (INTVAL (op) >= 0xFFFFF000
+                  && INTVAL (op) < 0x100000000)));
 #else
-  return (((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
-	   || (GET_CODE (op) == CONST_DOUBLE
-	       && CONST_DOUBLE_HIGH (op) == 0
-	       && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000))
-	  || GET_CODE (op) == CONSTANT_P_RTX);
+  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && CONST_DOUBLE_HIGH (op) == 0
+	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
 #endif
 }
 
@@ -1003,7 +1066,947 @@ clobbered_register (op, mode)
 {
   return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
 }
+
+/* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns.  */
+
+int
+zero_operand (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return op == const0_rtx;
+}
+
+/* Return 1 if OP is a valid operand for the source of a move insn.  */
+
+int
+input_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  /* If both modes are non-void they must be the same.  */
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
+    return 0;
+
+  /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary.  */
+  if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
+    return 1;
+
+  /* Allow any one instruction integer constant, and all CONST_INT
+     variants when we are working in DImode and !arch64.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && ((GET_CODE (op) == CONST_INT
+	   && ((SPARC_SETHI_P (INTVAL (op))
+		&& (! TARGET_ARCH64
+		    || (INTVAL (op) >= 0)
+		    || mode == SImode))
+	       || SPARC_SIMM13_P (INTVAL (op))
+	       || (mode == DImode
+		   && ! TARGET_ARCH64)))
+	  || (TARGET_ARCH64
+	      && GET_CODE (op) == CONST_DOUBLE
+	      && ((CONST_DOUBLE_HIGH (op) == 0
+		   && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
+		  ||
+#if HOST_BITS_PER_WIDE_INT == 64
+		  (CONST_DOUBLE_HIGH (op) == 0
+		   && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
+#else
+		  (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
+		   && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
+			&& CONST_DOUBLE_HIGH (op) == 0)
+		       || (CONST_DOUBLE_HIGH (op) == -1)))
+#endif
+		  ))))
+    return 1;
+
+  /* If !arch64 and this is a DImode const, allow it so that
+     the splits can be generated.  */
+  if (! TARGET_ARCH64
+      && mode == DImode
+      && GET_CODE (op) == CONST_DOUBLE)
+    return 1;
+
+  if (register_operand (op, mode))
+    return 1;
+
+  /* If this is a SUBREG, look inside so that we handle
+     paradoxical ones.  */
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* Check for valid MEM forms.  */
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+
+      if (GET_CODE (inside) == LO_SUM)
+	{
+	  /* We can't allow these because all of the splits
+	     (eventually as they trickle down into DFmode
+	     splits) require offsettable memory references.  */
+	  if (! TARGET_V9
+	      && GET_MODE (op) == TFmode)
+	    return 0;
+
+	  return (register_operand (XEXP (inside, 0), Pmode)
+		  && CONSTANT_P (XEXP (inside, 1)));
+	}
+      return memory_address_p (mode, inside);
+    }
+
+  return 0;
+}
+
 
+/* We know it can't be done in one insn when we get here,
+   the movsi expander guarentees this.  */
+void
+sparc_emit_set_const32 (op0, op1)
+     rtx op0;
+     rtx op1;
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx temp;
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (op1);
+
+      if (SPARC_SETHI_P (value)
+	  || SPARC_SIMM13_P (value))
+	abort ();
+    }
+
+  /* Full 2-insn decomposition is needed.  */
+  if (reload_in_progress || reload_completed)
+    temp = op0;
+  else
+    temp = gen_reg_rtx (mode);
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      /* Emit them as real moves instead of a HIGH/LO_SUM,
+	 this way CSE can see everything and reuse intermediate
+	 values if it wants.  */
+      if (TARGET_ARCH64
+	  && HOST_BITS_PER_WIDE_INT != 64
+	  && (INTVAL (op1) & 0x80000000) != 0)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  temp,
+				  gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
+							INTVAL (op1) & 0xfffffc00, 0)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  temp,
+				  GEN_INT (INTVAL (op1) & 0xfffffc00)));
+	}
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      op0,
+			      gen_rtx_IOR (mode,
+					   temp,
+					   GEN_INT (INTVAL (op1) & 0x3ff))));
+    }
+  else
+    {
+      /* A symbol, emit in the traditional way.  */
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      temp,
+			      gen_rtx_HIGH (mode,
+					    op1)));
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      op0,
+			      gen_rtx_LO_SUM (mode,
+					      temp,
+					      op1)));
+
+    }
+}
+
+
+/* Sparc-v9 code-model support. */
+void
+sparc_emit_set_symbolic_const64 (op0, op1, temp1)
+     rtx op0;
+     rtx op1;
+     rtx temp1;
+{
+  switch (sparc_cmodel)
+    {
+    case CM_MEDLOW:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable must be in the low 4TB of the virtual address
+	 space.
+
+	 sethi	%hi(symbol), %temp
+	 or	%temp, %lo(symbol), %reg  */
+      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
+      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
+      break;
+
+    case CM_MEDMID:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable must be in the low 16TB of the virtual address
+	 space.
+
+	 sethi	%h44(symbol), %temp1
+	 or	%temp1, %m44(symbol), %temp2
+	 sllx	%temp2, 12, %temp3
+	 or	%temp3, %l44(symbol), %reg  */
+      emit_insn (gen_seth44 (op0, op1));
+      emit_insn (gen_setm44 (op0, op0, op1));
+      emit_insn (gen_rtx_SET (VOIDmode, temp1,
+			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
+      emit_insn (gen_setl44 (op0, temp1, op1));
+      break;
+
+    case CM_MEDANY:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable can be placed anywhere in the virtual address
+	 space.
+
+	 sethi	%hh(symbol), %temp1
+	 sethi	%lm(symbol), %temp2
+	 or	%temp1, %hm(symbol), %temp3
+	 or	%temp2, %lo(symbol), %temp4
+	 sllx	%temp3, 32, %temp5
+	 or	%temp4, %temp5, %reg  */
+
+      /* Getting this right wrt. reloading is really tricky.
+	 We _MUST_ have a seperate temporary at this point,
+	 if we don't barf immediately instead of generating
+	 incorrect code.  */
+      if (temp1 == op0)
+	abort ();
+
+      emit_insn (gen_sethh (op0, op1));
+      emit_insn (gen_setlm (temp1, op1));
+      emit_insn (gen_sethm (op0, op0, op1));
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_rtx_PLUS (DImode, op0, temp1)));
+      emit_insn (gen_setlo (op0, op0, op1));
+      break;
+
+    case CM_EMBMEDANY:
+      /* Old old old backwards compatibility kruft here.
+	 Essentially it is MEDLOW with a fixed 64-bit
+	 virtual base added to all data segment addresses.
+	 Text-segment stuff is computed like MEDANY, we can't
+	 reuse the code above because the relocation knobs
+	 look different.
+
+	 Data segment:	sethi	%hi(symbol), %temp1
+			or	%temp1, %lo(symbol), %temp2
+			add	%temp2, EMBMEDANY_BASE_REG, %reg
+
+	 Text segment:	sethi	%uhi(symbol), %temp1
+			sethi	%hi(symbol), %temp2
+			or	%temp1, %ulo(symbol), %temp3
+			or	%temp2, %lo(symbol), %temp4
+			sllx	%temp3, 32, %temp5
+			or	%temp4, %temp5, %reg  */
+      if (data_segment_operand (op1, GET_MODE (op1)))
+	{
+	  emit_insn (gen_embmedany_sethi (temp1, op1));
+	  emit_insn (gen_embmedany_brsum (op0, temp1));
+	  emit_insn (gen_embmedany_losum (op0, op0, op1));
+	}
+      else
+	{
+	  /* Getting this right wrt. reloading is really tricky.
+	     We _MUST_ have a seperate temporary at this point,
+	     so we barf immediately instead of generating
+	     incorrect code.  */
+	  if (temp1 == op0)
+	    abort ();
+
+	  emit_insn (gen_embmedany_textuhi (op0, op1));
+	  emit_insn (gen_embmedany_texthi  (temp1, op1));
+	  emit_insn (gen_embmedany_textulo (op0, op0, op1));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, op0, temp1)));
+	  emit_insn (gen_embmedany_textlo  (op0, op0, op1));
+	}
+      break;
+
+    default:
+      abort();
+    }
+}
+
+/* These avoid problems when cross compiling.  If we do not
+   go through all this hair then the optimizer will see
+   invalid REG_EQUAL notes or in some cases none at all.  */
+static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
+static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
+static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
+static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
+
+#if HOST_BITS_PER_WIDE_INT == 64
+#define GEN_HIGHINT64(__x)		GEN_INT ((__x) & 0xfffffc00)
+#define GEN_INT64(__x)			GEN_INT (__x)
+#else
+#define GEN_HIGHINT64(__x) \
+	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
+			      (__x) & 0xfffffc00, 0)
+#define GEN_INT64(__x) \
+	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
+			      (__x) & 0xffffffff, \
+			      ((__x) & 0x80000000 \
+			       ? 0xffffffff : 0))
+#endif
+
+/* The optimizer is not to assume anything about exactly
+   which bits are set for a HIGH, they are unspecified.
+   Unfortunately this leads to many missed optimizations
+   during CSE.  We mask out the non-HIGH bits, and matches
+   a plain movdi, to alleviate this problem.  */
+static void
+sparc_emit_set_safe_HIGH64 (dest, val)
+     rtx dest;
+     HOST_WIDE_INT val;
+{
+  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
+}
+
+static rtx
+gen_safe_SET64 (dest, val)
+     rtx dest;
+     HOST_WIDE_INT val;
+{
+  return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
+}
+
+static rtx
+gen_safe_OR64 (src, val)
+     rtx src;
+     HOST_WIDE_INT val;
+{
+  return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
+}
+
+static rtx
+gen_safe_XOR64 (src, val)
+     rtx src;
+     HOST_WIDE_INT val;
+{
+  return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
+}
+
+/* Worker routines for 64-bit constant formation on arch64.
+   One of the key things to be doing in these emissions is
+   to create as many temp REGs as possible.  This makes it
+   possible for half-built constants to be used later when
+   such values are similar to something required later on.
+   Without doing this, the optimizer cannot see such
+   opportunities.  */
+
+static void sparc_emit_set_const64_quick1
+	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
+
+static void
+sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
+  rtx op0;
+  rtx temp;
+  unsigned HOST_WIDE_INT low_bits;
+  int is_neg;
+{
+  unsigned HOST_WIDE_INT high_bits;
+
+  if (is_neg)
+    high_bits = (~low_bits) & 0xffffffff;
+  else
+    high_bits = low_bits;
+
+  sparc_emit_set_safe_HIGH64 (temp, high_bits);
+  if (!is_neg)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+    }
+  else
+    {
+      /* If we are XOR'ing with -1, then we should emit a one's complement
+	 instead.  This way the combiner will notice logical operations
+	 such as ANDN later on and substitute.  */
+      if ((low_bits & 0x3ff) == 0x3ff)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_NOT (DImode, temp)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_safe_XOR64 (temp,
+						  (-0x400 | (low_bits & 0x3ff)))));
+	}
+    }
+}
+
+static void sparc_emit_set_const64_quick2
+	PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
+	       unsigned HOST_WIDE_INT, int));
+
+static void
+sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
+  rtx op0;
+  rtx temp;
+  unsigned HOST_WIDE_INT high_bits;
+  unsigned HOST_WIDE_INT low_immediate;
+  int shift_count;
+{
+  rtx temp2 = op0;
+
+  if ((high_bits & 0xfffffc00) != 0)
+    {
+      sparc_emit_set_safe_HIGH64 (temp, high_bits);
+      if ((high_bits & ~0xfffffc00) != 0)
+	emit_insn (gen_rtx_SET (VOIDmode, op0,
+				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+      else
+	temp2 = temp;
+    }
+  else
+    {
+      emit_insn (gen_safe_SET64 (temp, high_bits));
+      temp2 = temp;
+    }
+
+  /* Now shift it up into place. */
+  emit_insn (gen_rtx_SET (VOIDmode, op0,
+			  gen_rtx_ASHIFT (DImode, temp2,
+					  GEN_INT (shift_count))));
+
+  /* If there is a low immediate part piece, finish up by
+     putting that in as well.  */
+  if (low_immediate != 0)
+    emit_insn (gen_rtx_SET (VOIDmode, op0,
+			    gen_safe_OR64 (op0, low_immediate)));
+}
+
+static void sparc_emit_set_const64_longway
+	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
+
+/* Full 64-bit constant decomposition.  Even though this is the
+   'worst' case, we still optimize a few things away.  */
+static void
+sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
+     rtx op0;
+     rtx temp;
+     unsigned HOST_WIDE_INT high_bits;
+     unsigned HOST_WIDE_INT low_bits;
+{
+  rtx sub_temp;
+
+  if (reload_in_progress || reload_completed)
+    sub_temp = op0;
+  else
+    sub_temp = gen_reg_rtx (DImode);
+
+  if ((high_bits & 0xfffffc00) != 0)
+    {
+      sparc_emit_set_safe_HIGH64 (temp, high_bits);
+      if ((high_bits & ~0xfffffc00) != 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				sub_temp,
+				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+      else
+	sub_temp = temp;
+    }
+  else
+    {
+      emit_insn (gen_safe_SET64 (temp, high_bits));
+      sub_temp = temp;
+    }
+
+  if (!reload_in_progress && !reload_completed)
+    {
+      rtx temp2 = gen_reg_rtx (DImode);
+      rtx temp3 = gen_reg_rtx (DImode);
+      rtx temp4 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, temp4,
+			      gen_rtx_ASHIFT (DImode, sub_temp,
+					      GEN_INT (32))));
+
+      sparc_emit_set_safe_HIGH64 (temp2, low_bits);
+      if ((low_bits & ~0xfffffc00) != 0)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
+				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, temp4, temp3)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, temp4, temp2)));
+	}
+    }
+  else
+    {
+      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
+      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
+      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
+      int to_shift = 12;
+
+      /* We are in the middle of reload, so this is really
+	 painful.  However we do still make an attempt to
+	 avoid emitting truly stupid code.  */
+      if (low1 != const0_rtx)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, sub_temp,
+						  GEN_INT (to_shift))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_IOR (DImode, op0, low1)));
+	  sub_temp = op0;
+	  to_shift = 12;
+	}
+      else
+	{
+	  to_shift += 12;
+	}
+      if (low2 != const0_rtx)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, sub_temp,
+						  GEN_INT (to_shift))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_IOR (DImode, op0, low2)));
+	  sub_temp = op0;
+	  to_shift = 8;
+	}
+      else
+	{
+	  to_shift += 8;
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_rtx_ASHIFT (DImode, sub_temp,
+					      GEN_INT (to_shift))));
+      if (low3 != const0_rtx)
+	emit_insn (gen_rtx_SET (VOIDmode, op0,
+				gen_rtx_IOR (DImode, op0, low3)));
+      /* phew... */
+    }
+}
+
+/* Analyze a 64-bit constant for certain properties. */
+static void analyze_64bit_constant
+	PROTO((unsigned HOST_WIDE_INT,
+	       unsigned HOST_WIDE_INT,
+	       int *, int *, int *));
+
+static void
+analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
+     unsigned HOST_WIDE_INT high_bits, low_bits;
+     int *hbsp, *lbsp, *abbasp;
+{
+  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+  int i;
+
+  lowest_bit_set = highest_bit_set = -1;
+  i = 0;
+  do
+    {
+      if ((lowest_bit_set == -1)
+	  && ((low_bits >> i) & 1))
+	lowest_bit_set = i;
+      if ((highest_bit_set == -1)
+	  && ((high_bits >> (32 - i - 1)) & 1))
+	highest_bit_set = (64 - i - 1);
+    }
+  while (++i < 32
+	 && ((highest_bit_set == -1)
+	     || (lowest_bit_set == -1)));
+  if (i == 32)
+    {
+      i = 0;
+      do
+	{
+	  if ((lowest_bit_set == -1)
+	      && ((high_bits >> i) & 1))
+	    lowest_bit_set = i + 32;
+	  if ((highest_bit_set == -1)
+	      && ((low_bits >> (32 - i - 1)) & 1))
+	    highest_bit_set = 32 - i - 1;
+	}
+      while (++i < 32
+	     && ((highest_bit_set == -1)
+		 || (lowest_bit_set == -1)));
+    }
+  /* If there are no bits set this should have gone out
+     as one instruction!  */
+  if (lowest_bit_set == -1
+      || highest_bit_set == -1)
+    abort ();
+  all_bits_between_are_set = 1;
+  for (i = lowest_bit_set; i <= highest_bit_set; i++)
+    {
+      if (i < 32)
+	{
+	  if ((low_bits & (1 << i)) != 0)
+	    continue;
+	}
+      else
+	{
+	  if ((high_bits & (1 << (i - 32))) != 0)
+	    continue;
+	}
+      all_bits_between_are_set = 0;
+      break;
+    }
+  *hbsp = highest_bit_set;
+  *lbsp = lowest_bit_set;
+  *abbasp = all_bits_between_are_set;
+}
+
+static int const64_is_2insns
+	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
+
+static int
+const64_is_2insns (high_bits, low_bits)
+     unsigned HOST_WIDE_INT high_bits, low_bits;
+{
+  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+  if (high_bits == 0
+      || high_bits == 0xffffffff)
+    return 1;
+
+  analyze_64bit_constant (high_bits, low_bits,
+			  &highest_bit_set, &lowest_bit_set,
+			  &all_bits_between_are_set);
+
+  if ((highest_bit_set == 63
+       || lowest_bit_set == 0)
+      && all_bits_between_are_set != 0)
+    return 1;
+
+  if ((highest_bit_set - lowest_bit_set) < 21)
+    return 1;
+
+  return 0;
+}
+
+static unsigned HOST_WIDE_INT create_simple_focus_bits
+	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+	       int, int));
+
+static unsigned HOST_WIDE_INT
+create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
+     unsigned HOST_WIDE_INT high_bits, low_bits;
+     int lowest_bit_set, shift;
+{
+  HOST_WIDE_INT hi, lo;
+
+  if (lowest_bit_set < 32)
+    {
+      lo = (low_bits >> lowest_bit_set) << shift;
+      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+    }
+  else
+    {
+      lo = 0;
+      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+    }
+  if (hi & lo)
+    abort ();
+  return (hi | lo);
+}
+
+/* Here we are sure to be arch64 and this is an integer constant
+   being loaded into a register.  Emit the most efficient
+   insn sequence possible.  Detection of all the 1-insn cases
+   has been done already.  */
+void
+sparc_emit_set_const64 (op0, op1)
+     rtx op0;
+     rtx op1;
+{
+  unsigned HOST_WIDE_INT high_bits, low_bits;
+  int lowest_bit_set, highest_bit_set;
+  int all_bits_between_are_set;
+  rtx temp;
+
+  /* Sanity check that we know what we are working with.  */
+  if (! TARGET_ARCH64
+      || GET_CODE (op0) != REG
+      || (REGNO (op0) >= SPARC_FIRST_FP_REG
+	  && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
+    abort ();
+
+  if (reload_in_progress || reload_completed)
+    temp = op0;
+  else
+    temp = gen_reg_rtx (DImode);
+
+  if (GET_CODE (op1) != CONST_DOUBLE
+      && GET_CODE (op1) != CONST_INT)
+    {
+      sparc_emit_set_symbolic_const64 (op0, op1, temp);
+      return;
+    }
+
+  if (GET_CODE (op1) == CONST_DOUBLE)
+    {
+#if HOST_BITS_PER_WIDE_INT == 64
+      high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
+      low_bits  = CONST_DOUBLE_LOW (op1) & 0xffffffff;
+#else
+      high_bits = CONST_DOUBLE_HIGH (op1);
+      low_bits = CONST_DOUBLE_LOW (op1);
+#endif
+    }
+  else
+    {
+#if HOST_BITS_PER_WIDE_INT == 64
+      high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
+      low_bits = (INTVAL (op1) & 0xffffffff);
+#else
+      high_bits = ((INTVAL (op1) < 0) ?
+		   0xffffffff :
+		   0x00000000);
+      low_bits = INTVAL (op1);
+#endif
+    }
+
+  /* low_bits	bits 0  --> 31
+     high_bits	bits 32 --> 63  */
+
+  analyze_64bit_constant (high_bits, low_bits,
+			  &highest_bit_set, &lowest_bit_set,
+			  &all_bits_between_are_set);
+
+  /* First try for a 2-insn sequence.  */
+
+  /* These situations are preferred because the optimizer can
+   * do more things with them:
+   * 1) mov	-1, %reg
+   *    sllx	%reg, shift, %reg
+   * 2) mov	-1, %reg
+   *    srlx	%reg, shift, %reg
+   * 3) mov	some_small_const, %reg
+   *    sllx	%reg, shift, %reg
+   */
+  if (((highest_bit_set == 63
+	|| lowest_bit_set == 0)
+       && all_bits_between_are_set != 0)
+      || ((highest_bit_set - lowest_bit_set) < 12))
+    {
+      HOST_WIDE_INT the_const = -1;
+      int shift = lowest_bit_set;
+
+      if ((highest_bit_set != 63
+	   && lowest_bit_set != 0)
+	  || all_bits_between_are_set == 0)
+	{
+	  the_const =
+	    create_simple_focus_bits (high_bits, low_bits,
+				      lowest_bit_set, 0);
+	}
+      else if (lowest_bit_set == 0)
+	shift = -(63 - highest_bit_set);
+
+      if (! SPARC_SIMM13_P (the_const))
+	abort ();
+
+      emit_insn (gen_safe_SET64 (temp, the_const));
+      if (shift > 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_ASHIFT (DImode,
+						temp,
+						GEN_INT (shift))));
+      else if (shift < 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_LSHIFTRT (DImode,
+						  temp,
+						  GEN_INT (-shift))));
+      else
+	abort ();
+      return;
+    }
+
+  /* Now a range of 22 or less bits set somewhere.
+   * 1) sethi	%hi(focus_bits), %reg
+   *    sllx	%reg, shift, %reg
+   * 2) sethi	%hi(focus_bits), %reg
+   *    srlx	%reg, shift, %reg
+   */
+  if ((highest_bit_set - lowest_bit_set) < 21)
+    {
+      unsigned HOST_WIDE_INT focus_bits =
+	create_simple_focus_bits (high_bits, low_bits,
+				  lowest_bit_set, 10);
+
+      if (! SPARC_SETHI_P (focus_bits))
+	 abort ();
+
+      sparc_emit_set_safe_HIGH64 (temp, focus_bits);
+
+      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
+      if (lowest_bit_set < 10)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_LSHIFTRT (DImode, temp,
+						  GEN_INT (10 - lowest_bit_set))));
+      else if (lowest_bit_set > 10)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_ASHIFT (DImode, temp,
+						GEN_INT (lowest_bit_set - 10))));
+      else
+	abort ();
+      return;
+    }
+
+  /* 1) sethi	%hi(low_bits), %reg
+   *    or	%reg, %lo(low_bits), %reg
+   * 2) sethi	%hi(~low_bits), %reg
+   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
+   */
+  if (high_bits == 0
+      || high_bits == 0xffffffff)
+    {
+      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
+				     (high_bits == 0xffffffff));
+      return;
+    }
+
+  /* Now, try 3-insn sequences.  */
+
+  /* 1) sethi	%hi(high_bits), %reg
+   *    or	%reg, %lo(high_bits), %reg
+   *    sllx	%reg, 32, %reg
+   */
+  if (low_bits == 0)
+    {
+      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
+      return;
+    }
+
+  /* We may be able to do something quick
+     when the constant is negated, so try that.  */
+  if (const64_is_2insns ((~high_bits) & 0xffffffff,
+			 (~low_bits) & 0xfffffc00))
+    {
+      /* NOTE: The trailing bits get XOR'd so we need the
+	 non-negated bits, not the negated ones.  */
+      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
+
+      if ((((~high_bits) & 0xffffffff) == 0
+	   && ((~low_bits) & 0x80000000) == 0)
+	  || (((~high_bits) & 0xffffffff) == 0xffffffff
+	      && ((~low_bits) & 0x80000000) != 0))
+	{
+	  int fast_int = (~low_bits & 0xffffffff);
+
+	  if ((SPARC_SETHI_P (fast_int)
+	       && (~high_bits & 0xffffffff) == 0)
+	      || SPARC_SIMM13_P (fast_int))
+	    emit_insn (gen_safe_SET64 (temp, fast_int));
+	  else
+	    sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
+	}
+      else
+	{
+	  rtx negated_const;
+#if HOST_BITS_PER_WIDE_INT == 64
+	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
+				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
+#else
+	  negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
+						(~low_bits) & 0xfffffc00,
+						(~high_bits) & 0xffffffff);
+#endif
+	  sparc_emit_set_const64 (temp, negated_const);
+	}
+
+      /* If we are XOR'ing with -1, then we should emit a one's complement
+	 instead.  This way the combiner will notice logical operations
+	 such as ANDN later on and substitute.  */
+      if (trailing_bits == 0x3ff)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_NOT (DImode, temp)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  op0,
+				  gen_safe_XOR64 (temp,
+						  (-0x400 | trailing_bits))));
+	}
+      return;
+    }
+
+  /* 1) sethi	%hi(xxx), %reg
+   *    or	%reg, %lo(xxx), %reg
+   *	sllx	%reg, yyy, %reg
+   *
+   * ??? This is just a generalized version of the low_bits==0
+   * thing above, FIXME...
+   */
+  if ((highest_bit_set - lowest_bit_set) < 32)
+    {
+      unsigned HOST_WIDE_INT focus_bits =
+	create_simple_focus_bits (high_bits, low_bits,
+				  lowest_bit_set, 0);
+
+      /* We can't get here in this state.  */
+      if (highest_bit_set < 32
+	  || lowest_bit_set >= 32)
+	abort ();
+
+      /* So what we know is that the set bits straddle the
+	 middle of the 64-bit word.  */
+      sparc_emit_set_const64_quick2 (op0, temp,
+				     focus_bits, 0,
+				     lowest_bit_set);
+      return;
+    }
+
+  /* 1) sethi	%hi(high_bits), %reg
+   *    or	%reg, %lo(high_bits), %reg
+   *    sllx	%reg, 32, %reg
+   *	or	%reg, low_bits, %reg
+   */
+  if (SPARC_SIMM13_P(low_bits)
+      && ((int)low_bits > 0))
+    {
+      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
+      return;
+    }
+
+  /* The easiest way when all else fails, is full decomposition. */
+#if 0
+  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
+	  high_bits, low_bits, ~high_bits, ~low_bits);
+#endif
+  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
+}
+
 /* X and Y are two things to compare using CODE.  Emit the compare insn and
    return the rtx for the cc reg in the proper mode.  */
 
@@ -1061,15 +2064,8 @@ gen_compare_reg (code, x, y)
   else
     cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
 
-  if (TARGET_V8PLUS && mode == CCXmode)
-    {
-      emit_insn (gen_cmpdi_v8plus (x, y));
-    }
-  else
-    {
-      emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
+  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
 			  gen_rtx_COMPARE (mode, x, y)));
-    }
 
   return cc_reg;
 }
@@ -1240,7 +2236,7 @@ eligible_for_epilogue_delay (trial, slot)
   /* In the case of a true leaf function, anything can go into the delay slot.
      A delay slot only exists however if the frame size is zero, otherwise
      we will put an insn to adjust the stack after the return.  */
-  if (leaf_function)
+  if (current_function_uses_only_leaf_regs)
     {
       if (leaf_return_peephole_ok ())
 	return ((get_attr_in_uncond_branch_delay (trial)
@@ -1267,9 +2263,14 @@ eligible_for_epilogue_delay (trial, slot)
 
   src = SET_SRC (pat);
 
-  /* This matches "*return_[qhs]i".  */
+  /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64.  */
   if (arith_operand (src, GET_MODE (src)))
-    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+    {
+      if (TARGET_ARCH64)
+        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+      else
+        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+    }
     
   /* This matches "*return_di".  */
   else if (arith_double_operand (src, GET_MODE (src)))
@@ -1449,8 +2450,7 @@ legitimize_pic_address (orig, mode, reg)
      enum machine_mode mode ATTRIBUTE_UNUSED;
      rtx reg;
 {
-  if (GET_CODE (orig) == SYMBOL_REF
-      || GET_CODE (orig) == LABEL_REF)
+  if (GET_CODE (orig) == SYMBOL_REF)
     {
       rtx pic_ref, address;
       rtx insn;
@@ -1475,9 +2475,16 @@ legitimize_pic_address (orig, mode, reg)
 	     won't get confused into thinking that these two instructions
 	     are loading in the true address of the symbol.  If in the
 	     future a PIC rtx exists, that should be used instead.  */
-	  emit_insn (gen_pic_sethi_si (temp_reg, orig));
-	  emit_insn (gen_pic_lo_sum_si (temp_reg, temp_reg, orig));
-
+	  if (Pmode == SImode)
+	    {
+	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
+	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
+	    }
+	  else
+	    {
+	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
+	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
+	    }
 	  address = temp_reg;
 	}
       else
@@ -1532,18 +2539,15 @@ legitimize_pic_address (orig, mode, reg)
 	}
       return gen_rtx_PLUS (Pmode, base, offset);
     }
+  else if (GET_CODE (orig) == LABEL_REF)
+    /* ??? Why do we do this?  */
+    /* Now movsi_pic_label_ref uses it, but we ought to be checking that
+       the register is live instead, in case it is eliminated.  */
+    current_function_uses_pic_offset_table = 1;
 
   return orig;
 }
 
-/* Set up PIC-specific rtl.  This should not cause any insns
-   to be emitted.  */
-
-void
-initialize_pic ()
-{
-}
-
 /* Return the RTX for insns to set the PIC register.  */
 
 static rtx
@@ -1578,10 +2582,14 @@ finalize_pic ()
   /* If we havn't emitted the special get_pc helper function, do so now.  */
   if (get_pc_symbol_name[0] == 0)
     {
-      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
+      int align;
 
+      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
       text_section ();
-      ASM_OUTPUT_ALIGN (asm_out_file, 3);
+
+      align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+      if (align > 0)
+	ASM_OUTPUT_ALIGN (asm_out_file, align);
       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
       fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
     }
@@ -1594,10 +2602,13 @@ finalize_pic ()
 
   emit_insn_after (pic_setup_code (), get_insns ());
 
-  /* Insert the code in each nonlocal goto receiver.  */
+  /* Insert the code in each nonlocal goto receiver.
+     If you make changes here or to the nonlocal_goto_receiver
+     pattern, make sure the unspec_volatile numbers still
+     match.  */
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
-	&& XINT (PATTERN (insn), 1) == 4)
+	&& XINT (PATTERN (insn), 1) == 5)
       emit_insn_after (pic_setup_code (), insn);
 
   flag_pic = orig_flag_pic;
@@ -1609,275 +2620,36 @@ finalize_pic ()
   emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
 }
 
-/* Emit insns to move operands[1] into operands[0].
-
-   Return 1 if we have written out everything that needs to be done to
-   do the move.  Otherwise, return 0 and the caller will emit the move
-   normally.  */
+/* Return 1 if RTX is a MEM which is known to be aligned to at
+   least an 8 byte boundary.  */
 
 int
-emit_move_sequence (operands, mode)
-     rtx *operands;
-     enum machine_mode mode;
-{
-  register rtx operand0 = operands[0];
-  register rtx operand1 = operands[1];
-
-  if (CONSTANT_P (operand1) && flag_pic
-      && pic_address_needs_scratch (operand1))
-    operands[1] = operand1 = legitimize_pic_address (operand1, mode, 0);
-
-  /* Handle most common case first: storing into a register.  */
-  if (register_operand (operand0, mode))
-    {
-      /* Integer constant to FP register. */
-      if (GET_CODE (operand0) == REG
-	  && REGNO (operand0) >= 32
-	  && REGNO (operand0) < FIRST_PSEUDO_REGISTER
-	  && CONSTANT_P (operand1))
-	{
-	  operand1 = validize_mem (force_const_mem (GET_MODE (operand0), operand1));
-	}
-
-      if (register_operand (operand1, mode)
-	  || (GET_CODE (operand1) == CONST_INT && SMALL_INT (operand1))
-	  || (GET_CODE (operand1) == CONST_DOUBLE
-	      && arith_double_operand (operand1, DImode))
-	  || (GET_CODE (operand1) == HIGH && GET_MODE (operand1) != DImode)
-	  /* Only `general_operands' can come here, so MEM is ok.  */
-	  || GET_CODE (operand1) == MEM)
-	{
-	  /* Run this case quickly.  */
-	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
-	  return 1;
-	}
-    }
-  else if (GET_CODE (operand0) == MEM)
-    {
-      if (register_operand (operand1, mode)
-	  || (operand1 == const0_rtx && ! TARGET_LIVE_G0))
-	{
-	  /* Run this case quickly.  */
-	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
-	  return 1;
-	}
-      if (! reload_in_progress)
-	{
-	  operands[0] = validize_mem (operand0);
-	  operands[1] = operand1 = force_reg (mode, operand1);
-	}
-    }
-
-  /* DImode HIGH values in sparc64 need a clobber added.  */
-  if (TARGET_ARCH64
-      && GET_CODE (operand1) == HIGH && GET_MODE (operand1) == DImode)
-    {
-      emit_insn (gen_sethi_di_sp64 (operand0, XEXP (operand1, 0)));
-      return 1;
-    }
-  /* Simplify the source if we need to.  */
-  else if (GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
-    {
-      if (flag_pic && symbolic_operand (operand1, mode))
-	{
-	  rtx temp_reg = reload_in_progress ? operand0 : 0;
-
-	  operands[1] = legitimize_pic_address (operand1, mode, temp_reg);
-	}
-      else if (GET_CODE (operand1) == CONST_INT
-	       ? (! SMALL_INT (operand1)
-		  && INTVAL (operand1) != -4096
-		  && ! SPARC_SETHI_P (INTVAL (operand1)))
-	       : GET_CODE (operand1) == CONST_DOUBLE
-	       ? ! arith_double_operand (operand1, DImode)
-	       : 1)
-	{
-	  /* For DImode values, temp must be operand0 because of the way
-	     HI and LO_SUM work.  The LO_SUM operator only copies half of
-	     the LSW from the dest of the HI operator.  If the LO_SUM dest is
-	     not the same as the HI dest, then the MSW of the LO_SUM dest will
-	     never be set.
-
-	     ??? The real problem here is that the ...(HI:DImode pattern emits
-	     multiple instructions, and the ...(LO_SUM:DImode pattern emits
-	     one instruction.  This fails, because the compiler assumes that
-	     LO_SUM copies all bits of the first operand to its dest.  Better
-	     would be to have the HI pattern emit one instruction and the
-	     LO_SUM pattern multiple instructions.  Even better would be
-	     to use four rtl insns.  */
-	  rtx temp = ((reload_in_progress || mode == DImode)
-		      ? operand0 : gen_reg_rtx (mode));
-
-	  if (mode == SImode)
-	    {
-	      if (GET_CODE (operand1) == CONST_INT)
-		operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
-	      else if (GET_CODE (operand1) == CONST_DOUBLE)
-		operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
-	    }
-
-	  if (TARGET_ARCH64 && mode == DImode)
-	    emit_insn (gen_sethi_di_sp64 (temp, operand1));
-	  else
-	    emit_insn (gen_rtx_SET (VOIDmode, temp,
-				gen_rtx_HIGH (mode, operand1)));
-
-	  operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
-	}
-    }
-
-  /* Now have insn-emit do whatever it normally does.  */
-  return 0;
-}
-
-/* Return the best assembler insn template
-   for moving operands[1] into operands[0] as a 4 byte quantity.
-
-   This isn't intended to be very smart.  It is up to the caller to
-   choose the best way to do things.
-
-   Note that OPERANDS may be modified to suit the returned string.  */
-
-char *
-singlemove_string (operands)
-     rtx *operands;
+mem_min_alignment (mem, desired)
+     rtx mem;
+     int desired;
 {
-  if (GET_CODE (operands[0]) == MEM)
-    {
-      if (GET_CODE (operands[1]) != MEM)
-	return "st %r1,%0";
-      else
-	abort ();
-    }
-  else if (GET_CODE (operands[1]) == MEM)
-    return "ld %1,%0";
-  else if (GET_CODE (operands[1]) == CONST_DOUBLE)
-    {
-      REAL_VALUE_TYPE r;
-      long i;
-
-      /* Must be SFmode, otherwise this doesn't make sense.  */
-      if (GET_MODE (operands[1]) != SFmode)
-	abort ();
-
-      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
-      REAL_VALUE_TO_TARGET_SINGLE (r, i);
-      operands[1] = GEN_INT (i);
-
-      if (CONST_OK_FOR_LETTER_P (i, 'I'))
-	return "mov %1,%0";
-      else if ((i & 0x000003FF) != 0)
-	return "sethi %%hi(%a1),%0\n\tor %0,%%lo(%a1),%0";
-      else
-	return "sethi %%hi(%a1),%0";
-    }
-  else if (GET_CODE (operands[1]) == CONST_INT)
-    {
-      /* Only consider the low 32 bits of the constant. */
-      int i = INTVAL (operands[1]) & 0xffffffff;
-
-      if (SPARC_SIMM13_P (i))
-	return "mov %1,%0";
-
-      if (i == 4096)
-	return "sub %%g0,-4096,%0";
-
-      /* If all low order 10 bits are clear, then we only need a single
-	 sethi insn to load the constant.  */
-      /* FIXME: Use SETHI_P.  */
-      if ((i & 0x000003FF) != 0)
-	return "sethi %%hi(%a1),%0\n\tor %0,%%lo(%a1),%0";
-      else
-	return "sethi %%hi(%a1),%0";
-    }
-  /* Operand 1 must be a register, or a 'I' type CONST_INT.  */
-  return "mov %1,%0";
-}
-
-/* Return the best assembler insn template
-   for moving operands[1] into operands[0] as an 8 byte quantity.
-
-   This isn't intended to be very smart.  It is up to the caller to
-   choose the best way to do things.
-
-   Note that OPERANDS may be modified to suit the returned string.  */
-
-char *
-doublemove_string (operands)
-     rtx *operands;
-{
-  rtx op0 = operands[0], op1 = operands[1];
-
-  if (GET_CODE (op0) == MEM)
-    {
-      if (GET_CODE (op1) == REG)
-	{
-	  if (FP_REG_P (op1))
-	    return "std %1,%0";
-	  return TARGET_ARCH64 ? "stx %1,%0" : "std %1,%0";
-	}
-      if (TARGET_ARCH64
-	  && (op1 == const0_rtx
-	      || (GET_MODE (op1) != VOIDmode
-		  && op1 == CONST0_RTX (GET_MODE (op1)))))
-	return "stx %r1,%0";
-      abort ();
-    }
-  else if (GET_CODE (op1) == MEM)
-    {
-      if (GET_CODE (op0) != REG)
-	abort ();
-      if (FP_REG_P (op0))
-	return "ldd %1,%0";
-      return TARGET_ARCH64 ? "ldx %1,%0" : "ldd %1,%0";
-    }
-  else if (GET_CODE (operands[1]) == CONST_DOUBLE)
-    {
-      /* ??? Unfinished, and maybe not needed.  */
-      abort ();
-    }
-  else if (GET_CODE (operands[1]) == CONST_INT
-	   && ! CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'I'))
-    {
-      /* ??? Unfinished, and maybe not needed.  */
-      abort ();
-    }
-  /* Operand 1 must be a register, or a 'I' type CONST_INT.  */
-  return "mov %1,%0";
-}
-
-/* Return non-zero if it is OK to assume that the given memory operand is
-   aligned at least to a 8-byte boundary.  This should only be called
-   for memory accesses whose size is 8 bytes or larger.  */
-
-int
-mem_aligned_8 (mem)
-     register rtx mem;
-{
-  register rtx addr;
-  register rtx base;
-  register rtx offset;
+  rtx addr, base, offset;
 
+  /* If it's not a MEM we can't accept it.  */
   if (GET_CODE (mem) != MEM)
-    return 0;	/* It's gotta be a MEM! */
+    return 0;
 
   addr = XEXP (mem, 0);
-
-  /* Now that all misaligned double parms are copied on function entry,
-     we can assume any 64-bit object is 64-bit aligned except those which
-     are at unaligned offsets from the stack or frame pointer.  If the
-     TARGET_UNALIGNED_DOUBLES switch is given, we do not make this
-     assumption.  */
-
-  /* See what register we use in the address.  */
-  base = offset = 0;
+  base = offset = NULL_RTX;
   if (GET_CODE (addr) == PLUS)
     {
-      if (GET_CODE (XEXP (addr, 0)) == REG
-	  && GET_CODE (XEXP (addr, 1)) == CONST_INT)
+      if (GET_CODE (XEXP (addr, 0)) == REG)
 	{
 	  base = XEXP (addr, 0);
-	  offset = XEXP (addr, 1);
+
+	  /* What we are saying here is that if the base
+	     REG is aligned properly, the compiler will make
+	     sure any REG based index upon it will be so
+	     as well.  */
+	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	    offset = XEXP (addr, 1);
+	  else
+	    offset = const0_rtx;
 	}
     }
   else if (GET_CODE (addr) == REG)
@@ -1886,862 +2658,42 @@ mem_aligned_8 (mem)
       offset = const0_rtx;
     }
 
-  /* If it's the stack or frame pointer, check offset alignment.
-     We can have improper alignment in the function entry code.  */
-  if (base
-      && (REGNO (base) == FRAME_POINTER_REGNUM
-	  || REGNO (base) == STACK_POINTER_REGNUM))
-    {
-      if (((INTVAL (offset) - SPARC_STACK_BIAS) & 0x7) == 0)
-	return 1;
-    }
-  /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
-     is true, in which case we can only assume that an access is aligned if
-     it is to a constant address, or the address involves a LO_SUM.
-
-     We used to assume an address was aligned if MEM_IN_STRUCT_P was true.
-     That assumption was deleted so that gcc generated code can be used with
-     memory allocators that only guarantee 4 byte alignment.  */
-  else if (! TARGET_UNALIGNED_DOUBLES || CONSTANT_P (addr)
-	   || GET_CODE (addr) == LO_SUM)
-    return 1;
-
-  /* An obviously unaligned address.  */
-  return 0;
-}
-
-enum optype { REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP };
-
-/* Output assembler code to perform a doubleword move insn
-   with operands OPERANDS.  This is very similar to the following
-   output_move_quad function.  */
-
-char *
-output_move_double (operands)
-     rtx *operands;
-{
-  register rtx op0 = operands[0];
-  register rtx op1 = operands[1];
-  register enum optype optype0;
-  register enum optype optype1;
-  rtx latehalf[2];
-  rtx addreg0 = 0;
-  rtx addreg1 = 0;
-  int highest_first = 0;
-  int no_addreg1_decrement = 0;
-
-  /* First classify both operands.  */
-
-  if (REG_P (op0))
-    optype0 = REGOP;
-  else if (offsettable_memref_p (op0))
-    optype0 = OFFSOP;
-  else if (GET_CODE (op0) == MEM)
-    optype0 = MEMOP;
-  else
-    optype0 = RNDOP;
-
-  if (REG_P (op1))
-    optype1 = REGOP;
-  else if (CONSTANT_P (op1))
-    optype1 = CNSTOP;
-  else if (offsettable_memref_p (op1))
-    optype1 = OFFSOP;
-  else if (GET_CODE (op1) == MEM)
-    optype1 = MEMOP;
-  else
-    optype1 = RNDOP;
-
-  /* Check for the cases that the operand constraints are not
-     supposed to allow to happen.  Abort if we get one,
-     because generating code for these cases is painful.  */
-
-  if (optype0 == RNDOP || optype1 == RNDOP
-      || (optype0 == MEM && optype1 == MEM))
-    abort ();
-
-  /* If an operand is an unoffsettable memory ref, find a register
-     we can increment temporarily to make it refer to the second word.  */
-
-  if (optype0 == MEMOP)
-    addreg0 = find_addr_reg (XEXP (op0, 0));
-
-  if (optype1 == MEMOP)
-    addreg1 = find_addr_reg (XEXP (op1, 0));
-
-  /* Ok, we can do one word at a time.
-     Set up in LATEHALF the operands to use for the
-     high-numbered (least significant) word and in some cases alter the
-     operands in OPERANDS to be suitable for the low-numbered word.  */
-
-  if (optype0 == REGOP)
-    latehalf[0] = gen_rtx_REG (SImode, REGNO (op0) + 1);
-  else if (optype0 == OFFSOP)
-    latehalf[0] = adj_offsettable_operand (op0, 4);
-  else
-    latehalf[0] = op0;
-
-  if (optype1 == REGOP)
-    latehalf[1] = gen_rtx_REG (SImode, REGNO (op1) + 1);
-  else if (optype1 == OFFSOP)
-    latehalf[1] = adj_offsettable_operand (op1, 4);
-  else if (optype1 == CNSTOP)
-    {
-      if (TARGET_ARCH64)
-	{
-	  if (arith_double_operand (op1, DImode))
-	    {
-	      operands[1] = GEN_INT (CONST_DOUBLE_LOW (op1));
-	      return "mov %1,%0";
-	    }
-	  else
-	    {
-	      /* The only way to handle CONST_DOUBLEs or other 64 bit
-		 constants here is to use a temporary, such as is done
-		 for the V9 DImode sethi insn pattern.  This is not
-		 a practical solution, so abort if we reach here.
-		 The md file should always force such constants to
-		 memory.  */
-	      abort ();
-	    }
-	}
-      else
-	split_double (op1, &operands[1], &latehalf[1]);
-    }
-  else
-    latehalf[1] = op1;
-
-  /* Easy case: try moving both words at once.  Check for moving between
-     an even/odd register pair and a memory location.  */
-  if ((optype0 == REGOP && optype1 != REGOP && optype1 != CNSTOP
-       && (TARGET_ARCH64 || (REGNO (op0) & 1) == 0))
-      || (optype0 != REGOP && optype0 != CNSTOP && optype1 == REGOP
-	  && (TARGET_ARCH64 || (REGNO (op1) & 1) == 0)))
-    {
-      register rtx mem,reg;
-
-      if (optype0 == REGOP)
-	mem = op1, reg = op0;
-      else
-	mem = op0, reg = op1;
-
-      /* In v9, ldd can be used for word aligned addresses, so technically
-	 some of this logic is unneeded.  We still avoid ldd if the address
-	 is obviously unaligned though.
-
-	 Integer ldd/std are deprecated in V9 and are slow on UltraSPARC.
-	 Use them only if the access is volatile or not offsettable.  */
-
-      if ((mem_aligned_8 (mem)
-	   && (REGNO (reg) >= 32
-	       || MEM_VOLATILE_P (mem)
-	       || ! ((optype0 == OFFSOP || optype1 == OFFSOP)
-		     && (sparc_cpu == PROCESSOR_ULTRASPARC
-			 || sparc_cpu == PROCESSOR_V9))))
-	  /* If this is a floating point register higher than %f31,
-	     then we *must* use an aligned load, since `ld' will not accept
-	     the register number.  */
-	  || (TARGET_V9 && REGNO (reg) >= 64)
-	  /* Even if two instructions would otherwise be better than ldd/std,
-	     if this insn was put in a delay slot because reorg thought it
-	     was only one machine instruction, make sure it is only one
-	     instruction.  */
-	  || dbr_sequence_length () != 0)
-	{
-	  if (FP_REG_P (reg) || ! TARGET_ARCH64)
-	    return (mem == op1 ? "ldd %1,%0" : "std %1,%0");
-	  else
-	    return (mem == op1 ? "ldx %1,%0" : "stx %1,%0");
-	}
-    }
-
-  if (TARGET_ARCH64)
-    {
-      if (optype0 == REGOP && optype1 == REGOP)
-	{
-	  if (FP_REG_P (op0))
-	    return "fmovd %1,%0";
-	  else
-	    return "mov %1,%0";
-	}
-    }
-
-  /* If the first move would clobber the source of the second one,
-     do them in the other order.  */
-
-  /* Overlapping registers.  */
-  if (optype0 == REGOP && optype1 == REGOP
-      && REGNO (op0) == REGNO (latehalf[1]))
-    {
-      /* Do that word.  */
-      output_asm_insn (singlemove_string (latehalf), latehalf);
-      /* Do low-numbered word.  */
-      return singlemove_string (operands);
-    }
-  /* Loading into a register which overlaps a register used in the address.  */
-  else if (optype0 == REGOP && optype1 != REGOP
-	   && reg_overlap_mentioned_p (op0, op1))
-    {
-      /* If both halves of dest are used in the src memory address,
-	 add the two regs and put them in the low reg (op0).
-	 Then it works to load latehalf first.  */
-      if (reg_mentioned_p (op0, XEXP (op1, 0))
-	  && reg_mentioned_p (latehalf[0], XEXP (op1, 0)))
-	{
-	  rtx xops[2];
-	  xops[0] = latehalf[0];
-	  xops[1] = op0;
-	  output_asm_insn ("add %1,%0,%1", xops);
-	  operands[1] = gen_rtx_MEM (DImode, op0);
-	  latehalf[1] = adj_offsettable_operand (operands[1], 4);
-	  addreg1 = 0;
-	  highest_first = 1;
-	}
-      /* Only one register in the dest is used in the src memory address,
-	 and this is the first register of the dest, so we want to do
-	 the late half first here also.  */
-      else if (! reg_mentioned_p (latehalf[0], XEXP (op1, 0)))
-	highest_first = 1;
-      /* Only one register in the dest is used in the src memory address,
-	 and this is the second register of the dest, so we want to do
-	 the late half last.  If addreg1 is set, and addreg1 is the same
-	 register as latehalf, then we must suppress the trailing decrement,
-	 because it would clobber the value just loaded.  */
-      else if (addreg1 && reg_mentioned_p (addreg1, latehalf[0]))
-	no_addreg1_decrement = 1;
-    }
-
-  /* Normal case: do the two words, low-numbered first.
-     Overlap case (highest_first set): do high-numbered word first.  */
-
-  if (! highest_first)
-    output_asm_insn (singlemove_string (operands), operands);
-
-  /* Make any unoffsettable addresses point at high-numbered word.  */
-  if (addreg0)
-    output_asm_insn ("add %0,0x4,%0", &addreg0);
-  if (addreg1)
-    output_asm_insn ("add %0,0x4,%0", &addreg1);
-
-  /* Do that word.  */
-  output_asm_insn (singlemove_string (latehalf), latehalf);
-
-  /* Undo the adds we just did.  */
-  if (addreg0)
-    output_asm_insn ("add %0,-0x4,%0", &addreg0);
-  if (addreg1 && ! no_addreg1_decrement)
-    output_asm_insn ("add %0,-0x4,%0", &addreg1);
-
-  if (highest_first)
-    output_asm_insn (singlemove_string (operands), operands);
-
-  return "";
-}
-
-/* Output assembler code to perform a quadword move insn
-   with operands OPERANDS.  This is very similar to the preceding
-   output_move_double function.  */
-
-char *
-output_move_quad (operands)
-     rtx *operands;
-{
-  register rtx op0 = operands[0];
-  register rtx op1 = operands[1];
-  register enum optype optype0;
-  register enum optype optype1;
-  rtx wordpart[4][2];
-  rtx load_late[4];
-  int load_late_half[2];
-  rtx addreg0 = 0;
-  rtx addreg1 = 0;
-
-  load_late_half[0] = 0; load_late_half[1] = 0;
-  load_late[0] = 0; load_late[1] = 0; load_late[2] = 0; 
-  load_late[3] = 0;
-
-  wordpart[0][0] = NULL;  wordpart[1][0] = NULL;  wordpart[2][0] = NULL;
-  wordpart[3][0] = NULL;
-
-  /* First classify both operands.  */
-
-  if (REG_P (op0))
-    optype0 = REGOP;
-  else if (offsettable_memref_p (op0))
-    optype0 = OFFSOP;
-  else if (GET_CODE (op0) == MEM)
-    optype0 = MEMOP;
-  else
-    optype0 = RNDOP;
-
-  if (REG_P (op1))
-    optype1 = REGOP;
-  else if (CONSTANT_P (op1))
-    optype1 = CNSTOP;
-  else if (offsettable_memref_p (op1))
-    optype1 = OFFSOP;
-  else if (GET_CODE (op1) == MEM)
-    optype1 = MEMOP;
-  else
-    optype1 = RNDOP;
-
-  /* Check for the cases that the operand constraints are not
-     supposed to allow to happen.  Abort if we get one,
-     because generating code for these cases is painful.  */
-
-  if (optype0 == RNDOP || optype1 == RNDOP
-      || (optype0 == MEM && optype1 == MEM))
-    abort ();
-
-  if (optype0 == REGOP)
-    {
-      wordpart[0][0] = gen_rtx_REG (word_mode, REGNO (op0) + 0);
-      if (TARGET_ARCH64 && FP_REG_P (op0) 
-	  && REGNO (op0) < SPARC_FIRST_V9_FP_REG)
-	wordpart[1][0] = gen_rtx_REG (word_mode, REGNO (op0) + 2);
-      else
-	wordpart[1][0] = gen_rtx_REG (word_mode, REGNO (op0) + 1);
-
-      if (TARGET_ARCH32)
-	{
-	  wordpart[2][0] = gen_rtx_REG (word_mode, REGNO (op0) + 2);
-	  wordpart[3][0] = gen_rtx_REG (word_mode, REGNO (op0) + 3);
-	}
-  
-      /* Loading into a register which overlaps a register used in the
-	 address.  */
-      if (optype1 != REGOP && reg_overlap_mentioned_p (op0, op1))
-	{
-	  int i;
-	  int count;
-
-	  count = 0;
-
-	  for (i = 0; i < 4 && wordpart[i][0] != NULL; i++)
-	    {
-	      if (reg_mentioned_p (wordpart[i][0], op1))
-		{
-		  load_late[i] = wordpart[i][0];
-		  load_late_half[TARGET_ARCH64 ? i : i/2] = 1;
-		  count++;
-		}
-	    }
-	  if (count > 2)
-	    {
-	      /* Not sure what to do here. Multiple adds? Can't happen. */
-	      abort ();
-	    }
-	  else if (count == 2)
-	    {
-	      /* We have a two-address source operand, and both registers
-		 overlap with the dest quad. Add them together and
-		 store the result into the last register of the quad being
-		 loaded, then generate an appropriate MEM insn. */
-	      rtx temp[3];
-	      int place = 0;
-
-	      for (i = 0; i < 4; i++)
-		{
-		  if (load_late[i])
-		    {
-		      temp[place++] = load_late[i];
-		      load_late[i] = 0;
-		    }
-		}
-	      temp[2] = wordpart[3][0];
-	      output_asm_insn ("add %0, %1, %2", temp);
-	      load_late_half[0] = 0;
-	      load_late_half[1] = 1;
-	      op1 = gen_rtx_MEM (TFmode, wordpart[3][0]);
-	      operands[1] = op1;
-	      optype1 = OFFSOP;
-	    }
-	}
-    }
-
-  /* If an operand is an unoffsettable memory ref, find a register
-     we can increment temporarily to make it refer to the later words.  */
-
-  if (optype0 == MEMOP)
-    addreg0 = find_addr_reg (XEXP (op0, 0));
-
-  if (optype1 == MEMOP)
-    addreg1 = find_addr_reg (XEXP (op1, 0));
-
-  /* Ok, we can do one word at a time.
-     Set up in wordpart the operands to use for each word of the arguments.  */
-
-  if (optype0 == OFFSOP)
-    {
-      wordpart[0][0] = adj_offsettable_operand (op0, 0);
-      if (TARGET_ARCH32)
-	{
-	  wordpart[1][0] = adj_offsettable_operand (op0, 4);
-	  wordpart[2][0] = adj_offsettable_operand (op0, 8);
-	  wordpart[3][0] = adj_offsettable_operand (op0, 12);
-	}
-      else
-	wordpart[1][0] = adj_offsettable_operand (op0, 8);
-    }
-  else if (optype0 != REGOP)
-    {
-      wordpart[0][0] = op0;
-      wordpart[1][0] = op0;
-      wordpart[2][0] = op0;
-      wordpart[3][0] = op0;
-    }
-
-  if (optype1 == REGOP)
+  if (base != NULL_RTX)
     {
-      wordpart[0][1] = gen_rtx_REG (word_mode, REGNO (op1) + 0);
-      if (TARGET_ARCH64 && FP_REG_P (op1)
-	  && REGNO (op1) < SPARC_FIRST_V9_FP_REG)
-	wordpart[1][1] = gen_rtx_REG (word_mode, REGNO (op1) + 2);
-      else
-	wordpart[1][1] = gen_rtx_REG (word_mode, REGNO (op1) + 1);
+      int regno = REGNO (base);
 
-      if (TARGET_ARCH32)
-	{
-	  wordpart[2][1] = gen_rtx_REG (word_mode, REGNO (op1) + 2);
-	  wordpart[3][1] = gen_rtx_REG (word_mode, REGNO (op1) + 3);
-	}
-    }
-  else if (optype1 == OFFSOP)
-    {
-      wordpart[0][1] = adj_offsettable_operand (op1, 0);
-      if (TARGET_ARCH32)
+      if (regno != FRAME_POINTER_REGNUM
+	  && regno != STACK_POINTER_REGNUM)
 	{
-	  wordpart[1][1] = adj_offsettable_operand (op1, 4);
-	  wordpart[2][1] = adj_offsettable_operand (op1, 8);
-	  wordpart[3][1] = adj_offsettable_operand (op1, 12);
+	  /* Check if the compiler has recorded some information
+	     about the alignment of the base REG.  If reload has
+	     completed, we already matched with proper alignments.  */
+	  if (((regno_pointer_align != NULL
+		&& REGNO_POINTER_ALIGN (regno) >= desired)
+	       || reload_completed)
+	      && ((INTVAL (offset) & (desired - 1)) == 0))
+	    return 1;
 	}
       else
-	wordpart[1][1] = adj_offsettable_operand (op1, 8);
-    }
-  else if (optype1 == CNSTOP)
-    {
-      REAL_VALUE_TYPE r;
-      long l[4];
-
-      /* This only works for TFmode floating point constants.  */
-      if (GET_CODE (op1) != CONST_DOUBLE || GET_MODE (op1) != TFmode)
-	abort ();
-
-      REAL_VALUE_FROM_CONST_DOUBLE (r, op1);
-      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
-      
-      wordpart[0][1] = GEN_INT (l[0]);
-      wordpart[1][1] = GEN_INT (l[1]);
-      wordpart[2][1] = GEN_INT (l[2]);
-      wordpart[3][1] = GEN_INT (l[3]);
-    }
-  else
-    {
-      wordpart[0][1] = op1;
-      wordpart[1][1] = op1;
-      wordpart[2][1] = op1;
-      wordpart[3][1] = op1;
-    }
-
-  /* Easy case: try moving the quad as two pairs.  Check for moving between
-     an even/odd register pair and a memory location.
-     Also handle new v9 fp regs here.  */
-  /* ??? Should also handle the case of non-offsettable addresses here.
-     We can at least do the first pair as a ldd/std, and then do the third
-     and fourth words individually.  */
-  if ((optype0 == REGOP && optype1 == OFFSOP && (REGNO (op0) & 1) == 0)
-      || (optype0 == OFFSOP && optype1 == REGOP && (REGNO (op1) & 1) == 0))
-    {
-      rtx mem, reg;
-      int use_ldx;
-
-      if (optype0 == REGOP)
-	mem = op1, reg = op0;
-      else
-	mem = op0, reg = op1;
-
-      if (mem_aligned_8 (mem)
-	  /* If this is a floating point register higher than %f31,
-	     then we *must* use an aligned load, since `ld' will not accept
-	     the register number.  */
-	  || (TARGET_V9 && REGNO (reg) >= SPARC_FIRST_V9_FP_REG))
-	{
-	  static char * const mov_by_64[2][2][2] = {
-	    { { "std %S1,%2;std %1,%0", "stx %R1,%2;stx %1,%0" },
-	      { "ldd %2,%S0;ldd %1,%0", "ldx %2,%R0;ldx %1,%0" } },
-	    { { "std %1,%0;std %S1,%2", "stx %1,%0;stx %R1,%2" },
-	      { "ldd %1,%0;ldd %2,%S0", "ldx %1,%0;ldx %2,%R0" } }
-	  };
-
-	  if (TARGET_V9 && FP_REG_P (reg) && TARGET_HARD_QUAD)
-	    {
-	      /* Only abort if the register # requires that we use ldq. */
-	      if ((REGNO (reg) & 3) == 0)
-		{
-		  /* ??? Can `mem' have an inappropriate alignment here?  */
-		  return (mem == op1 ? "ldq %1,%0" : "stq %1,%0");
-		}
-	      else 
-		{
-		  if (REGNO (reg) >= SPARC_FIRST_V9_FP_REG)
-		    abort();
-		}
-	    }
-	  operands[2] = adj_offsettable_operand (mem, 8);
-
-	  /* Do the loads in the right order; can't overwrite our address
-	     register. */
-	  use_ldx = TARGET_ARCH64 && !FP_REG_P (reg);
-	  return mov_by_64[!load_late_half[0]][mem == op1][use_ldx];
-	}
-    }
-
-  /* If the first move would clobber the source of the second one,
-     do them in the other order.  */
-
-  /* Overlapping registers?  */
-  if (TARGET_ARCH32)
-    {
-      if (optype0 == REGOP && optype1 == REGOP
-	  && (REGNO (op0) == REGNO (wordpart[1][3])
-	      || REGNO (op0) == REGNO (wordpart[1][2])
-	      || REGNO (op0) == REGNO (wordpart[1][1])))
 	{
-	  /* Do fourth word.  */
-	  output_asm_insn (singlemove_string (wordpart[3]), wordpart[3]);
-	  /* Do the third word.  */
-	  output_asm_insn (singlemove_string (wordpart[2]), wordpart[2]);
-	  /* Do the second word.  */
-	  output_asm_insn (singlemove_string (wordpart[1]), wordpart[1]);
-	  /* Do lowest-numbered word.  */
-	  output_asm_insn (singlemove_string (wordpart[0]), wordpart[0]);
-	  return "";
-	}
-    }
-  else /* TARGET_ARCH64 */
-    {
-      if (optype0 == REGOP && optype1 == REGOP
-	  && REGNO (op0) == REGNO (wordpart[1][1]))
-	{
-	  output_asm_insn ("mov %1,%0", wordpart[1]);
-	  output_asm_insn ("mov %1,%0", wordpart[0]);
-	  return "";
-	}
-    }
-
-  /* Normal case: move the words in lowest to highest address order.
-     There may have an overlapping register; in that case, skip and go
-     back. */
-
-  if (TARGET_ARCH32)
-    {
-      int i;
-      int offset = 0xc;
-      rtx temp[2];
-
-      for (i = 0; i < 4; i++)
-	{
-	  if (! load_late[i])
-	    output_asm_insn (singlemove_string (wordpart[i]), wordpart[i]);
-
-	  if (i != 3)
-	    {
-	      /* Make any unoffsettable addresses point at the next word.  */
-	      if (addreg0)
-		output_asm_insn ("add %0,0x4,%0", &addreg0);
-	      if (addreg1)
-		output_asm_insn ("add %0,0x4,%0", &addreg1);
-	    }
-	}
-      for (i = 0; i < 4; i++)
-	{
-	  if (load_late[i])
-	    {
-	      int fix = offset - i * 4;
-
-	      /* Back up to the appropriate place. */
-	      temp[1] = GEN_INT (-fix);
-	      if (addreg0)
-		{
-		  temp[0] = addreg0;
-		  output_asm_insn ("add %0,%1,%0", temp);
-		}
-	      if (addreg1)
-		{
-		  temp[0] = addreg1;
-		  output_asm_insn ("add %0,%1,%0", temp);
-		}
-	      output_asm_insn (singlemove_string (wordpart[i]),
-			       wordpart[i]);
-	      /* Don't modify the register that's the destination of the
-		 move. */
-	      temp[0] = GEN_INT (-(offset - fix));
-	      if (addreg0 && REGNO (addreg0) != REGNO (wordpart[i][0]))
-		{
-		  temp[1] = addreg0;
-		  output_asm_insn("add %0,%1,%0", temp);
-		}
-	      if (addreg1 && REGNO (addreg1) != REGNO (wordpart[i][0]))
-		{
-		  temp[1] = addreg1;
-		  output_asm_insn("add %0,%1,%0",temp);
-		}
-	      offset = 0;
-	      break;
-	    }
-	}
-      if (offset)
-	{
-	  temp[1] = GEN_INT (-offset);
-	  /* Undo the adds we just did.  */
-	  if (addreg0)
-	    {
-	      temp[0] = addreg0;
-	      output_asm_insn ("add %0,%1,%0", temp);
-	    }
-	  if (addreg1)
-	    {
-	      temp[0] = addreg1;
-	      output_asm_insn ("add %0,%1,%0", temp);
-	    }
+	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
+	    return 1;
 	}
     }
-  else /* TARGET_ARCH64 */
+  else if (! TARGET_UNALIGNED_DOUBLES
+	   || CONSTANT_P (addr)
+	   || GET_CODE (addr) == LO_SUM)
     {
-      if (load_late_half[0]) 
-	{
-	  /* Load the second half first. */
-	  if (addreg0)
-	    output_asm_insn ("add %0,0x8,%0", &addreg0);
-	  if (addreg1)
-	    output_asm_insn ("add %0,0x8,%0", &addreg1);
-
-	  output_asm_insn (doublemove_string (wordpart[1]), wordpart[1]);
-
-	  /* Undo the adds we just did.  */
-	  if (addreg0)
-	    output_asm_insn ("add %0,-0x8,%0", &addreg0);
-	  if (addreg1)
-	    output_asm_insn ("add %0,-0x8,%0", &addreg1);
-
-	  output_asm_insn (doublemove_string (wordpart[0]), wordpart[0]);
-	}
-      else
-	{
-	  output_asm_insn (doublemove_string (wordpart[0]), wordpart[0]);
-
-	  if (addreg0)
-	    output_asm_insn ("add %0,0x8,%0", &addreg0);
-	  if (addreg1)
-	    output_asm_insn ("add %0,0x8,%0", &addreg1);
-
-	  /* Do the second word.  */
-	  output_asm_insn (doublemove_string (wordpart[1]), wordpart[1]);
-
-	  /* Undo the adds we just did.  But don't modify the dest of
-	     the move. */
-	  if (addreg0 && REGNO (addreg0) != REGNO (wordpart[1][0]))
-	    output_asm_insn ("add %0,-0x8,%0", &addreg0);
-	  if (addreg1 && REGNO (addreg1) != REGNO (wordpart[1][0]))
-	    output_asm_insn ("add %0,-0x8,%0", &addreg1);
-	}
+      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
+	 is true, in which case we can only assume that an access is aligned if
+	 it is to a constant address, or the address involves a LO_SUM.  */
+      return 1;
     }
   
-  return "";
-}
-
-/* Output assembler code to perform a doubleword move insn with operands
-   OPERANDS, one of which must be a floating point register.  */
-
-char *
-output_fp_move_double (operands)
-     rtx *operands;
-{
-  if (FP_REG_P (operands[0]))
-    {
-      if (FP_REG_P (operands[1]))
-	{
-	  if (TARGET_V9)
-	    return "fmovd %1,%0";
-	  else
-	    return "fmovs %1,%0\n\tfmovs %R1,%R0";
-	}
-      else if (GET_CODE (operands[1]) == REG)
-	abort ();
-      else
-	return output_move_double (operands);
-    }
-  else if (FP_REG_P (operands[1]))
-    {
-      if (GET_CODE (operands[0]) == REG)
-	abort ();
-      else
-	return output_move_double (operands);
-    }
-  else abort ();
-}
-
-/* When doing a quad-register move, determine the drection in which
-   the move needs to be performed. SRC and DST are the source and
-   destination registers.
-
-   A value of -1 indicates that the move needs to be done from the
-   highest register to the lowest. */
-
-static int
-move_quad_direction (src, dst)
-     rtx src, dst;
-{
-  if ((REGNO (dst) > REGNO (src))
-      && (REGNO (dst) < (REGNO (src) + 4)))
-    return -1;
-  else
-    return 1;
-}
-
-/* Output assembler code to perform a quadword move insn with operands
-   OPERANDS, one of which must be a floating point register.  */
-
-char *
-output_fp_move_quad (operands)
-     rtx *operands;
-{
-  register rtx op0 = operands[0];
-  register rtx op1 = operands[1];
-
-  if (FP_REG_P (op0))
-    {
-      if (FP_REG_P (op1))
-	{
-	  if (TARGET_V9 && TARGET_HARD_QUAD)
-	    return "fmovq %1,%0";
-	  else if (TARGET_V9)
-	    {
-	      int dir = move_quad_direction (op1, op0);
-	      if (dir > 0)
-		return "fmovd %1,%0\n\tfmovd %S1,%S0";
-	      else 
-		return "fmovd %S1,%S0\n\tfmovd %1,%0";
-	    }
-	  else
-	    {
-	      int dir = move_quad_direction (op0, op1);
-	      if (dir > 0)
-		return "fmovs %1,%0\n\tfmovs %R1,%R0\n\tfmovs %S1,%S0\n\tfmovs %T1,%T0";
-	      else
-		return "fmovs %T1,%T0\n\tfmovs %S1,%S0\n\tfmovs %R1,%R0\n\tfmovs %1,%0";
-	    }
-	}
-      else if (GET_CODE (op1) == REG)
-	abort ();
-      else
-	return output_move_quad (operands);
-    }
-  else if (FP_REG_P (op1))
-    {
-      if (GET_CODE (op0) == REG)
-	abort ();
-      else
-	return output_move_quad (operands);
-    }
-  else
-    abort ();
-}
-
-/* Return a REG that occurs in ADDR with coefficient 1.
-   ADDR can be effectively incremented by incrementing REG.  */
-
-static rtx
-find_addr_reg (addr)
-     rtx addr;
-{
-  while (GET_CODE (addr) == PLUS)
-    {
-      /* We absolutely can not fudge the frame pointer here, because the
-	 frame pointer must always be 8 byte aligned.  It also confuses
-	 debuggers.  */
-      if (GET_CODE (XEXP (addr, 0)) == REG
-	  && REGNO (XEXP (addr, 0)) != FRAME_POINTER_REGNUM)
-	addr = XEXP (addr, 0);
-      else if (GET_CODE (XEXP (addr, 1)) == REG
-	       && REGNO (XEXP (addr, 1)) != FRAME_POINTER_REGNUM)
-	addr = XEXP (addr, 1);
-      else if (CONSTANT_P (XEXP (addr, 0)))
-	addr = XEXP (addr, 1);
-      else if (CONSTANT_P (XEXP (addr, 1)))
-	addr = XEXP (addr, 0);
-      else
-	abort ();
-    }
-  if (GET_CODE (addr) == REG)
-    return addr;
-  abort ();
+  /* An obviously unaligned address.  */
+  return 0;
 }
-
-/* Output reasonable peephole for set-on-condition-code insns.
-   Note that these insns assume a particular way of defining
-   labels.  Therefore, *both* sparc.h and this function must
-   be changed if a new syntax is needed.    */
 
-char *
-output_scc_insn (operands, insn)
-     rtx operands[];
-     rtx insn;
-{
-  static char string[100];
-  rtx label = 0, next = insn;
-  int need_label = 0;
-
-  /* This code used to be called with final_sequence nonzero (for fpcc
-     delay slots), but that is no longer allowed.  */
-  if (final_sequence)
-    abort ();
-
-  /* On UltraSPARC a conditional moves blocks until 3 cycles after prior loads
-     complete.  It might be beneficial here to use branches if any recent
-     instructions were loads.  */
-  if (TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG)
-    return "mov 0,%0\n\tmov%C2 %x1,1,%0";
-
-  /* Try doing a jump optimization which jump.c can't do for us
-     because we did not expose that setcc works by using branches.
-
-     If this scc insn is followed by an unconditional branch, then have
-     the jump insn emitted here jump to that location, instead of to
-     the end of the scc sequence as usual.  */
-
-  do
-    {
-      if (GET_CODE (next) == CODE_LABEL)
-	label = next;
-      next = NEXT_INSN (next);
-    }
-  while (next && (GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL));
-
-  if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next))
-    label = JUMP_LABEL (next);
-
-  /* If not optimizing, jump label fields are not set.  To be safe, always
-     check here to whether label is still zero.  */
-  if (label == 0)
-    {
-      label = gen_label_rtx ();
-      need_label = 1;
-    }
-
-  LABEL_NUSES (label) += 1;
-
-  /* operands[3] is an unused slot.  */
-  operands[3] = label;
-
-  strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0, 0));
-  strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
-
-  if (need_label)
-    strcat (string, "\n%l3:");
-
-  return string;
-}
 
 /* Vectors to keep interesting information about registers where it can easily
    be got.  We use to use the actual mode value as the bit number, but there
@@ -2940,7 +2892,7 @@ static int
 save_regs (file, low, high, base, offset, n_regs, real_offset)
      FILE *file;
      int low, high;
-     char *base;
+     const char *base;
      int offset;
      int n_regs;
      int real_offset;
@@ -2953,7 +2905,7 @@ save_regs (file, low, high, base, offset, n_regs, real_offset)
 	{
 	  if (regs_ever_live[i] && ! call_used_regs[i])
 	    {
-	      fprintf (file, "\tstx %s,[%s+%d]\n",
+	      fprintf (file, "\tstx\t%s, [%s+%d]\n",
 		       reg_names[i], base, offset + 4 * n_regs);
 	      if (dwarf2out_do_frame ())
 		dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
@@ -2969,7 +2921,7 @@ save_regs (file, low, high, base, offset, n_regs, real_offset)
 	    {
 	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
 		{
-		  fprintf (file, "\tstd %s,[%s+%d]\n",
+		  fprintf (file, "\tstd\t%s, [%s+%d]\n",
 			   reg_names[i], base, offset + 4 * n_regs);
 		  if (dwarf2out_do_frame ())
 		    {
@@ -2981,7 +2933,7 @@ save_regs (file, low, high, base, offset, n_regs, real_offset)
 		}
 	      else
 		{
-		  fprintf (file, "\tst %s,[%s+%d]\n",
+		  fprintf (file, "\tst\t%s, [%s+%d]\n",
 			   reg_names[i], base, offset + 4 * n_regs);
 		  if (dwarf2out_do_frame ())
 		    dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
@@ -2992,7 +2944,7 @@ save_regs (file, low, high, base, offset, n_regs, real_offset)
 	    {
 	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
 		{
-		  fprintf (file, "\tst %s,[%s+%d]\n",
+		  fprintf (file, "\tst\t%s, [%s+%d]\n",
 			   reg_names[i+1], base, offset + 4 * n_regs + 4);
 		  if (dwarf2out_do_frame ())
 		    dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
@@ -3013,7 +2965,7 @@ static int
 restore_regs (file, low, high, base, offset, n_regs)
      FILE *file;
      int low, high;
-     char *base;
+     const char *base;
      int offset;
      int n_regs;
 {
@@ -3024,7 +2976,7 @@ restore_regs (file, low, high, base, offset, n_regs)
       for (i = low; i < high; i++)
 	{
 	  if (regs_ever_live[i] && ! call_used_regs[i])
-	    fprintf (file, "\tldx [%s+%d], %s\n",
+	    fprintf (file, "\tldx\t[%s+%d], %s\n",
 	      base, offset + 4 * n_regs, reg_names[i]),
 	    n_regs += 2;
 	}
@@ -3035,15 +2987,15 @@ restore_regs (file, low, high, base, offset, n_regs)
 	{
 	  if (regs_ever_live[i] && ! call_used_regs[i])
 	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])
-	      fprintf (file, "\tldd [%s+%d], %s\n",
+	      fprintf (file, "\tldd\t[%s+%d], %s\n",
 		       base, offset + 4 * n_regs, reg_names[i]),
 	      n_regs += 2;
 	    else
-	      fprintf (file, "\tld [%s+%d],%s\n",
+	      fprintf (file, "\tld\t[%s+%d],%s\n",
 		       base, offset + 4 * n_regs, reg_names[i]),
 	      n_regs += 2;
 	  else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
-	    fprintf (file, "\tld [%s+%d],%s\n",
+	    fprintf (file, "\tld\t[%s+%d],%s\n",
 		     base, offset + 4 * n_regs + 4, reg_names[i+1]),
 	    n_regs += 2;
 	}
@@ -3128,13 +3080,13 @@ static void
 build_big_number (file, num, reg)
      FILE *file;
      int num;
-     char *reg;
+     const char *reg;
 {
   if (num >= 0 || ! TARGET_ARCH64)
     {
-      fprintf (file, "\tsethi %%hi(%d),%s\n", num, reg);
+      fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
       if ((num & 0x3ff) != 0)
-	fprintf (file, "\tor %s,%%lo(%d),%s\n", reg, num, reg);
+	fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
     }
   else /* num < 0 && TARGET_ARCH64 */
     {
@@ -3147,7 +3099,7 @@ build_big_number (file, num, reg)
       int inv = ~asize;
       int low = -0x400 + (asize & 0x3FF);
 	  
-      fprintf (file, "\tsethi %%hi(%d),%s\n\txor %s,%d,%s\n",
+      fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
 	       inv, reg, reg, low, reg);
     }
 }
@@ -3183,16 +3135,16 @@ output_function_prologue (file, size, leaf_function)
   else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
     {
       if (actual_fsize <= 4096)
-	fprintf (file, "\tsave %%sp,-%d,%%sp\n", actual_fsize);
+	fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
       else if (actual_fsize <= 8192)
 	{
-	  fprintf (file, "\tsave %%sp,-4096,%%sp\n");
-	  fprintf (file, "\tadd %%sp,-%d,%%sp\n", actual_fsize - 4096);
+	  fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
+	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
 	}
       else
 	{
 	  build_big_number (file, -actual_fsize, "%g1");
-	  fprintf (file, "\tsave %%sp,%%g1,%%sp\n");
+	  fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
 	}
     }
   else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
@@ -3203,31 +3155,31 @@ output_function_prologue (file, size, leaf_function)
       fprintf (file, "\tsave\n");
 
       if (actual_fsize <= 4096)
-	fprintf (file, "\tadd %%fp,-%d,%%sp\n", actual_fsize);
+	fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
       else if (actual_fsize <= 8192)
 	{
-	  fprintf (file, "\tadd %%fp,-4096,%%sp\n");
-	  fprintf (file, "\tadd %%fp,-%d,%%sp\n", actual_fsize - 4096);
+	  fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
+	  fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
 	}
       else
 	{
 	  build_big_number (file, -actual_fsize, "%g1");
-	  fprintf (file, "\tadd %%fp,%%g1,%%sp\n");
+	  fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
 	}
     }
   else /* leaf function */
     {
       if (actual_fsize <= 4096)
-	fprintf (file, "\tadd %%sp,-%d,%%sp\n", actual_fsize);
+	fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
       else if (actual_fsize <= 8192)
 	{
-	  fprintf (file, "\tadd %%sp,-4096,%%sp\n");
-	  fprintf (file, "\tadd %%sp,-%d,%%sp\n", actual_fsize - 4096);
+	  fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
+	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
 	}
       else
 	{
 	  build_big_number (file, -actual_fsize, "%g1");
-	  fprintf (file, "\tadd %%sp,%%g1,%%sp\n");
+	  fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
 	}
     }
 
@@ -3260,7 +3212,7 @@ output_function_prologue (file, size, leaf_function)
   if (num_gfregs)
     {
       int offset, real_offset, n_regs;
-      char *base;
+      const char *base;
 
       real_offset = -apparent_fsize;
       offset = -apparent_fsize + frame_base_offset;
@@ -3273,7 +3225,7 @@ output_function_prologue (file, size, leaf_function)
 	     output_function_epilogue will lose (the result will get
 	     clobbered).  */
 	  build_big_number (file, offset, "%g1");
-	  fprintf (file, "\tadd %s,%%g1,%%g1\n", frame_base_name);
+	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
 	  base = "%g1";
 	  offset = 0;
 	}
@@ -3311,7 +3263,7 @@ output_function_epilogue (file, size, leaf_function)
      int size ATTRIBUTE_UNUSED;
      int leaf_function;
 {
-  char *ret;
+  const char *ret;
 
   if (leaf_label)
     {
@@ -3328,25 +3280,26 @@ output_function_epilogue (file, size, leaf_function)
 
   else if (current_function_epilogue_delay_list == 0)
     {                                                
-      /* If code does not drop into the epilogue, do nothing.  */
+      /* If code does not drop into the epilogue, we need
+	 do nothing except output pending case vectors.  */
       rtx insn = get_last_insn ();                               
       if (GET_CODE (insn) == NOTE)                               
       insn = prev_nonnote_insn (insn);                           
       if (insn && GET_CODE (insn) == BARRIER)                    
-      return;                                                    
+      goto output_vectors;                                                    
     }
 
   /* Restore any call saved registers.  */
   if (num_gfregs)
     {
       int offset, n_regs;
-      char *base;
+      const char *base;
 
       offset = -apparent_fsize + frame_base_offset;
       if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
 	{
 	  build_big_number (file, offset, "%g1");
-	  fprintf (file, "\tadd %s,%%g1,%%g1\n", frame_base_name);
+	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
 	  base = "%g1";
 	  offset = 0;
 	}
@@ -3368,9 +3321,9 @@ output_function_epilogue (file, size, leaf_function)
 
   /* Work out how to skip the caller's unimp instruction if required.  */
   if (leaf_function)
-    ret = (SKIP_CALLERS_UNIMP_P ? "jmp %o7+12" : "retl");
+    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
   else
-    ret = (SKIP_CALLERS_UNIMP_P ? "jmp %i7+12" : "ret");
+    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
 
   if (TARGET_EPILOGUE || leaf_label)
     {
@@ -3391,7 +3344,7 @@ output_function_epilogue (file, size, leaf_function)
 	      final_scan_insn (insn, file, 1, 0, 1);
 	    }
 	  else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
-	    fputs ("\treturn %i7+8\n\tnop\n", file);
+	    fputs ("\treturn\t%i7+8\n\tnop\n", file);
 	  else
 	    fprintf (file, "\t%s\n\trestore\n", ret);
 	}
@@ -3413,18 +3366,21 @@ output_function_epilogue (file, size, leaf_function)
       else if (actual_fsize == 0)
 	fprintf (file, "\t%s\n\tnop\n", ret);
       else if (actual_fsize <= 4096)
-	fprintf (file, "\t%s\n\tsub %%sp,-%d,%%sp\n", ret, actual_fsize);
+	fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
       else if (actual_fsize <= 8192)
-	fprintf (file, "\tsub %%sp,-4096,%%sp\n\t%s\n\tsub %%sp,-%d,%%sp\n",
+	fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
 		 ret, actual_fsize - 4096);
       else if ((actual_fsize & 0x3ff) == 0)
-	fprintf (file, "\tsethi %%hi(%d),%%g1\n\t%s\n\tadd %%sp,%%g1,%%sp\n",
+	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
 		 actual_fsize, ret);
       else		 
-	fprintf (file, "\tsethi %%hi(%d),%%g1\n\tor %%g1,%%lo(%d),%%g1\n\t%s\n\tadd %%sp,%%g1,%%sp\n",
+	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
 		 actual_fsize, actual_fsize, ret);
       target_flags |= old_target_epilogue;
     }
+
+ output_vectors:
+  sparc_output_deferred_case_vectors ();
 }
 
 /* Functions for handling argument passing.
@@ -3661,6 +3617,13 @@ struct function_arg_record_value_parms
   int nregs, intoffset;
 };
 
+static void function_arg_record_value_3
+	PROTO((int, struct function_arg_record_value_parms *));
+static void function_arg_record_value_2
+	PROTO((tree, int, struct function_arg_record_value_parms *));
+static rtx function_arg_record_value
+	PROTO((tree, enum machine_mode, int, int, int));
+
 static void
 function_arg_record_value_1 (type, startbitpos, parms)
      tree type;
@@ -3902,7 +3865,7 @@ function_arg_record_value (type, mode, slotno, named, regbase)
 	nregs = SPARC_INT_ARG_MAX - slotno;
     }
   if (nregs == 0)
-    abort();
+    abort ();
 
   parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
 
@@ -4278,6 +4241,12 @@ function_value (type, mode, incoming_p)
 	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
 	}
     }
+    
+  if (TARGET_ARCH64
+      && GET_MODE_CLASS (mode) == MODE_INT 
+      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+      && type && TREE_CODE (type) != UNION_TYPE)
+    mode = DImode;
 
   if (incoming_p)
     regno = BASE_RETURN_VALUE_REG (mode);
@@ -4314,7 +4283,7 @@ sparc_builtin_saveregs (arglist)
 		     GEN_INT (STACK_POINTER_OFFSET
 			      + UNITS_PER_WORD * first_reg));
 
-  if (flag_check_memory_usage
+  if (current_function_check_memory_usage
       && first_reg < NPARM_REGS (word_mode))
     emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
 		       address, ptr_mode,
@@ -4346,16 +4315,16 @@ output_cbranch (op, label, reversed, annul, noop, insn)
      int reversed, annul, noop;
      rtx insn;
 {
-  static char string[20];
+  static char string[32];
   enum rtx_code code = GET_CODE (op);
   rtx cc_reg = XEXP (op, 0);
   enum machine_mode mode = GET_MODE (cc_reg);
-  static char v8_labelno[] = " %lX";
-  static char v9_icc_labelno[] = " %%icc,%lX";
-  static char v9_xcc_labelno[] = " %%xcc,%lX";
-  static char v9_fcc_labelno[] = " %%fccX,%lY";
+  static char v8_labelno[] = "%lX";
+  static char v9_icc_labelno[] = "%%icc, %lX";
+  static char v9_xcc_labelno[] = "%%xcc, %lX";
+  static char v9_fcc_labelno[] = "%%fccX, %lY";
   char *labelno;
-  int labeloff;
+  int labeloff, spaces = 8;
 
   /* ??? !v9: FP branches cannot be preceded by another floating point insn.
      Because there is currently no concept of pre-delay slots, we can fix
@@ -4376,16 +4345,28 @@ output_cbranch (op, label, reversed, annul, noop, insn)
     {
     case NE:
       if (mode == CCFPmode || mode == CCFPEmode)
-	strcat (string, "fbne");
+	{
+	  strcat (string, "fbne");
+	  spaces -= 4;
+	}
       else
-	strcpy (string, "bne");
+	{
+	  strcpy (string, "bne");
+	  spaces -= 3;
+	}
       break;
 
     case EQ:
       if (mode == CCFPmode || mode == CCFPEmode)
-	strcat (string, "fbe");
+	{
+	  strcat (string, "fbe");
+	  spaces -= 3;
+	}
       else
-	strcpy (string, "be");
+	{
+	  strcpy (string, "be");
+	  spaces -= 2;
+	}
       break;
 
     case GE:
@@ -4395,23 +4376,39 @@ output_cbranch (op, label, reversed, annul, noop, insn)
 	    strcat (string, "fbul");
 	  else
 	    strcat (string, "fbge");
+	  spaces -= 4;
 	}
       else if (mode == CC_NOOVmode)
-	strcpy (string, "bpos");
+	{
+	  strcpy (string, "bpos");
+	  spaces -= 4;
+	}
       else
-	strcpy (string, "bge");
+	{
+	  strcpy (string, "bge");
+	  spaces -= 3;
+	}
       break;
 
     case GT:
       if (mode == CCFPmode || mode == CCFPEmode)
 	{
 	  if (reversed)
-	    strcat (string, "fbule");
+	    {
+	      strcat (string, "fbule");
+	      spaces -= 5;
+	    }
 	  else
-	    strcat (string, "fbg");
+	    {
+	      strcat (string, "fbg");
+	      spaces -= 3;
+	    }
 	}
       else
-	strcpy (string, "bg");
+	{
+	  strcpy (string, "bg");
+	  spaces -= 2;
+	}
       break;
 
     case LE:
@@ -4421,52 +4418,75 @@ output_cbranch (op, label, reversed, annul, noop, insn)
 	    strcat (string, "fbug");
 	  else
 	    strcat (string, "fble");
+	  spaces -= 4;
 	}
       else
-	strcpy (string, "ble");
+	{
+	  strcpy (string, "ble");
+	  spaces -= 3;
+	}
       break;
 
     case LT:
       if (mode == CCFPmode || mode == CCFPEmode)
 	{
 	  if (reversed)
-	    strcat (string, "fbuge");
+	    {
+	      strcat (string, "fbuge");
+	      spaces -= 5;
+	    }
 	  else
-	    strcat (string, "fbl");
+	    {
+	      strcat (string, "fbl");
+	      spaces -= 3;
+	    }
 	}
       else if (mode == CC_NOOVmode)
-	strcpy (string, "bneg");
+	{
+	  strcpy (string, "bneg");
+	  spaces -= 4;
+	}
       else
-	strcpy (string, "bl");
+	{
+	  strcpy (string, "bl");
+	  spaces -= 2;
+	}
       break;
 
     case GEU:
       strcpy (string, "bgeu");
+      spaces -= 4;
       break;
 
     case GTU:
       strcpy (string, "bgu");
+      spaces -= 3;
       break;
 
     case LEU:
       strcpy (string, "bleu");
+      spaces -= 4;
       break;
 
     case LTU:
       strcpy (string, "blu");
+      spaces -= 3;
       break;
 
     default:
-      break;
+      abort ();
     }
 
   /* Now add the annulling, the label, and a possible noop.  */
   if (annul)
-    strcat (string, ",a");
+    {
+      strcat (string, ",a");
+      spaces -= 2;
+    }
 
   if (! TARGET_V9)
     {
-      labeloff = 3;
+      labeloff = 2;
       labelno = v8_labelno;
     }
   else
@@ -4474,7 +4494,11 @@ output_cbranch (op, label, reversed, annul, noop, insn)
       rtx note;
 
       if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
-	strcat (string, INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
+	{
+	  strcat (string,
+		  INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
+	  spaces -= 3;
+	}
 
       labeloff = 9;
       if (mode == CCFPmode || mode == CCFPEmode)
@@ -4482,7 +4506,7 @@ output_cbranch (op, label, reversed, annul, noop, insn)
 	  labeloff = 10;
 	  labelno = v9_fcc_labelno;
 	  /* Set the char indicating the number of the fcc reg to use.  */
-	  labelno[6] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
+	  labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
 	}
       else if (mode == CCXmode || mode == CCX_NOOVmode)
 	labelno = v9_xcc_labelno;
@@ -4492,6 +4516,10 @@ output_cbranch (op, label, reversed, annul, noop, insn)
   /* Set the char indicating the number of the operand containing the
      label_ref.  */
   labelno[labeloff] = label + '0';
+  if (spaces > 0)
+    strcat (string, "\t");
+  else
+    strcat (string, " ");
   strcat (string, labelno);
 
   if (noop)
@@ -4512,15 +4540,18 @@ output_cbranch (op, label, reversed, annul, noop, insn)
    NOOP is non-zero if we have to follow this branch by a noop.  */
 
 char *
-output_v9branch (op, reg, label, reversed, annul, noop)
+output_v9branch (op, reg, label, reversed, annul, noop, insn)
      rtx op;
      int reg, label;
      int reversed, annul, noop;
+     rtx insn;
 {
   static char string[20];
   enum rtx_code code = GET_CODE (op);
   enum machine_mode mode = GET_MODE (XEXP (op, 0));
-  static char labelno[] = " %X,%lX";
+  static char labelno[] = "%X, %lX";
+  rtx note;
+  int spaces = 8;
 
   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
   if (reversed)
@@ -4536,26 +4567,32 @@ output_v9branch (op, reg, label, reversed, annul, noop)
     {
     case NE:
       strcpy (string, "brnz");
+      spaces -= 4;
       break;
 
     case EQ:
       strcpy (string, "brz");
+      spaces -= 3;
       break;
 
     case GE:
       strcpy (string, "brgez");
+      spaces -= 5;
       break;
 
     case LT:
       strcpy (string, "brlz");
+      spaces -= 4;
       break;
 
     case LE:
       strcpy (string, "brlez");
+      spaces -= 5;
       break;
 
     case GT:
       strcpy (string, "brgz");
+      spaces -= 4;
       break;
 
     default:
@@ -4564,12 +4601,24 @@ output_v9branch (op, reg, label, reversed, annul, noop)
 
   /* Now add the annulling, reg, label, and nop.  */
   if (annul)
-    strcat (string, ",a");
+    {
+      strcat (string, ",a");
+      spaces -= 2;
+    }
 
-  /* ??? Optional prediction bit ",pt" or ",pf" goes here.  */
+  if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
+    {
+      strcat (string,
+	      INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
+      spaces -= 3;
+    }
 
-  labelno[2] = reg + '0';
+  labelno[1] = reg + '0';
   labelno[6] = label + '0';
+  if (spaces > 0)
+    strcat (string, "\t");
+  else
+    strcat (string, " ");
   strcat (string, labelno);
 
   if (noop)
@@ -4626,13 +4675,13 @@ epilogue_renumber (where)
 
     default:
       debug_rtx (*where);
-      abort();
+      abort ();
     }
 }
 
 /* Output assembler code to return from a function.  */
 
-char *
+const char *
 output_return (operands)
      rtx *operands;
 {
@@ -4643,7 +4692,7 @@ output_return (operands)
       operands[0] = leaf_label;
       return "b%* %l0%(";
     }
-  else if (leaf_function)
+  else if (current_function_uses_only_leaf_regs)
     {
       /* No delay slot in a leaf function.  */
       if (delay)
@@ -4661,24 +4710,24 @@ output_return (operands)
       if (actual_fsize <= 4096)
 	{
 	  if (SKIP_CALLERS_UNIMP_P)
-	    return "jmp %%o7+12\n\tsub %%sp,-%0,%%sp";
+	    return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
 	  else
-	    return "retl\n\tsub %%sp,-%0,%%sp";
+	    return "retl\n\tsub\t%%sp, -%0, %%sp";
 	}
       else if (actual_fsize <= 8192)
 	{
 	  operands[0] = GEN_INT (actual_fsize - 4096);
 	  if (SKIP_CALLERS_UNIMP_P)
-	    return "sub %%sp,-4096,%%sp\n\tjmp %%o7+12\n\tsub %%sp,-%0,%%sp";
+	    return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
 	  else
-	    return "sub %%sp,-4096,%%sp\n\tretl\n\tsub %%sp,-%0,%%sp";
+	    return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
 	}
       else if (SKIP_CALLERS_UNIMP_P)
 	{
 	  if ((actual_fsize & 0x3ff) != 0)
-	    return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tjmp %%o7+12\n\tadd %%sp,%%g1,%%sp";
+	    return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
 	  else
-	    return "sethi %%hi(%a0),%%g1\n\tjmp %%o7+12\n\tadd %%sp,%%g1,%%sp";
+	    return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
 	}
       else
 	{
@@ -4696,16 +4745,16 @@ output_return (operands)
 	  epilogue_renumber (&SET_SRC (PATTERN (delay)));
 	}
       if (SKIP_CALLERS_UNIMP_P)
-	return "return %%i7+12%#";
+	return "return\t%%i7+12%#";
       else
-	return "return %%i7+8%#";
+	return "return\t%%i7+8%#";
     }
   else
     {
       if (delay)
 	abort ();
       if (SKIP_CALLERS_UNIMP_P)
-	return "jmp %%i7+12\n\trestore";
+	return "jmp\t%%i7+12\n\trestore";
       else
 	return "ret\n\trestore";
     }
@@ -4736,6 +4785,53 @@ order_regs_for_local_alloc ()
     }
 }
 
+/* Return 1 if REG and MEM are legitimate enough to allow the various
+   mem<-->reg splits to be run.  */
+
+int
+sparc_splitdi_legitimate (reg, mem)
+     rtx reg;
+     rtx mem;
+{
+  /* Punt if we are here by mistake.  */
+  if (! reload_completed)
+    abort ();
+
+  /* We must have an offsettable memory reference.  */
+  if (! offsettable_memref_p (mem))
+    return 0;
+
+  /* If we have legitimate args for ldd/std, we do not want
+     the split to happen.  */
+  if ((REGNO (reg) % 2) == 0
+      && mem_min_alignment (mem, 8))
+    return 0;
+
+  /* Success.  */
+  return 1;
+}
+
+/* Return 1 if x and y are some kind of REG and they refer to
+   different hard registers.  This test is guarenteed to be
+   run after reload.  */
+
+int
+sparc_absnegfloat_split_legitimate (x, y)
+     rtx x, y;
+{
+  if (GET_CODE (x) == SUBREG)
+    x = alter_subreg (x);
+  if (GET_CODE (x) != REG)
+    return 0;
+  if (GET_CODE (y) == SUBREG)
+    y = alter_subreg (y);
+  if (GET_CODE (y) != REG)
+    return 0;
+  if (REGNO (x) == REGNO (y))
+    return 0;
+  return 1;
+}
+
 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
    This makes them candidates for using ldd and std insns. 
 
@@ -4866,7 +4962,7 @@ print_operand (file, x, code)
     case '#':
       /* Output a 'nop' if there's nothing for the delay slot.  */
       if (dbr_sequence_length () == 0)
-	fputs ("\n\tnop", file);
+	fputs ("\n\t nop", file);
       return;
     case '*':
       /* Output an annul flag if there's nothing for the delay slot and we
@@ -4884,7 +4980,7 @@ print_operand (file, x, code)
 	 not optimizing.  This is always used with '*' above.  */
       if (dbr_sequence_length () == 0
 	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
-	fputs ("\n\tnop", file);
+	fputs ("\n\t nop", file);
       return;
     case '_':
       /* Output the Embedded Medium/Anywhere code model base register.  */
@@ -5086,7 +5182,10 @@ print_operand (file, x, code)
   else if (GET_CODE (x) == LO_SUM)
     {
       print_operand (file, XEXP (x, 0), 0);
-      fputs ("+%lo(", file);
+      if (TARGET_CM_MEDMID)
+	fputs ("+%l44(", file);
+      else
+	fputs ("+%lo(", file);
       output_addr_const (file, XEXP (x, 1));
       fputc (')', file);
     }
@@ -5152,7 +5251,7 @@ output_double_int (file, value)
 		|| GET_CODE (value) == CODE_LABEL
 		|| GET_CODE (value) == MINUS)))
     {
-      if (!TARGET_V9 || TARGET_CM_MEDLOW)
+      if (! TARGET_V9)
 	{
 	  ASM_OUTPUT_INT (file, const0_rtx);
 	  ASM_OUTPUT_INT (file, value);
@@ -5335,14 +5434,18 @@ sparc_initialize_trampoline (tramp, fnaddr, cxt)
 {
   /* SPARC 32 bit trampoline:
 
- 	sethi %hi(fn),%g1
- 	sethi %hi(static),%g2
- 	jmp %g1+%lo(fn)
- 	or %g2,%lo(static),%g2
+ 	sethi	%hi(fn), %g1
+ 	sethi	%hi(static), %g2
+ 	jmp	%g1+%lo(fn)
+ 	or	%g2, %lo(static), %g2
 
     SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
     JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
    */
+#ifdef TRANSFER_FROM_TRAMPOLINE
+  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
+                     0, VOIDmode, 1, tramp, Pmode);
+#endif
 
   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
 		  expand_binop (SImode, ior_optab,
@@ -5386,11 +5489,16 @@ void
 sparc64_initialize_trampoline (tramp, fnaddr, cxt)
      rtx tramp, fnaddr, cxt;
 {
+#ifdef TRANSFER_FROM_TRAMPOLINE
+  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
+                     0, VOIDmode, 1, tramp, Pmode);
+#endif
+
   /*
-	rd %pc,%g1
-	ldx [%g1+24],%g5
-	jmp %g5
-	ldx [%g1+16],%g5
+	rd	%pc, %g1
+	ldx	[%g1+24], %g5
+	jmp	%g5
+	ldx	[%g1+16], %g5
 	+16 bytes data
    */
 
@@ -5399,12 +5507,13 @@ sparc64_initialize_trampoline (tramp, fnaddr, cxt)
   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
 		  GEN_INT (0xca586018));
   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
-		  GEN_INT (0x81c04000));
+		  GEN_INT (0x81c14000));
   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
 		  GEN_INT (0xca586010));
   emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
-  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr);
+  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
   emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
+
   if (sparc_cpu != PROCESSOR_ULTRASPARC)
     emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
 }
@@ -5525,18 +5634,23 @@ sparc_flat_compute_frame_size (size)
 
   /* This is the size of the 16 word reg save area, 1 word struct addr
      area, and 4 word fp/alu register copy area.  */
-  extra_size	 = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
-  var_size	 = size;
-  /* Also include the size needed for the 6 parameter registers.  */
-  args_size	 = current_function_outgoing_args_size + 24;
-  total_size	 = var_size + args_size + extra_size;
-  gp_reg_size	 = 0;
-  fp_reg_size	 = 0;
-  gmask		 = 0;
-  fmask		 = 0;
-  reg_offset	 = 0;
+  extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
+  var_size = size;
+  gp_reg_size = 0;
+  fp_reg_size = 0;
+  gmask = 0;
+  fmask = 0;
+  reg_offset = 0;
   need_aligned_p = 0;
 
+  args_size = 0;
+  if (!leaf_function_p ())
+    {
+      /* Also include the size needed for the 6 parameter registers.  */
+      args_size = current_function_outgoing_args_size + 24;
+    }
+  total_size = var_size + args_size;
+
   /* Calculate space needed for gp registers.  */
   for (regno = 1; regno <= 31; regno++)
     {
@@ -5585,9 +5699,13 @@ sparc_flat_compute_frame_size (size)
       total_size += gp_reg_size + fp_reg_size;
     }
 
-  /* ??? This looks a little suspicious.  Clarify.  */
-  if (total_size == extra_size)
-    total_size = extra_size = 0;
+  /* If we must allocate a stack frame at all, we must also allocate 
+     room for register window spillage, so as to be binary compatible
+     with libraries and operating systems that do not use -mflat.  */
+  if (total_size > 0)
+    total_size += extra_size;
+  else
+    extra_size = 0;
 
   total_size = SPARC_STACK_ALIGN (total_size);
 
@@ -5656,7 +5774,7 @@ sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
 
 		  if (word_op[0] == 's')
 		    {
-		      fprintf (file, "\t%s %s,[%s+%d]\n",
+		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
 			       doubleword_op, reg_names[regno],
 			       base_reg, offset);
 		      if (dwarf2out_do_frame ())
@@ -5668,7 +5786,7 @@ sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
 			}
 		    }
 		  else
-		    fprintf (file, "\t%s [%s+%d],%s\n",
+		    fprintf (file, "\t%s\t[%s+%d], %s\n",
 			     doubleword_op, base_reg, offset,
 			     reg_names[regno]);
 
@@ -5679,14 +5797,14 @@ sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
 		{
 		  if (word_op[0] == 's')
 		    {
-		      fprintf (file, "\t%s %s,[%s+%d]\n",
+		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
 			       word_op, reg_names[regno],
 			       base_reg, offset);
 		      if (dwarf2out_do_frame ())
 			dwarf2out_reg_save ("", regno, offset + base_offset);
 		    }
 		  else
-		    fprintf (file, "\t%s [%s+%d],%s\n",
+		    fprintf (file, "\t%s\t[%s+%d], %s\n",
 			     word_op, base_reg, offset, reg_names[regno]);
 
 		  offset += UNITS_PER_WORD;
@@ -5703,14 +5821,14 @@ sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
 	    {
 	      if (word_op[0] == 's')
 		{
-		  fprintf (file, "\t%s %s,[%s+%d]\n",
+		  fprintf (file, "\t%s\t%s, [%s+%d]\n",
 			   word_op, reg_names[regno],
 			   base_reg, offset);
 		  if (dwarf2out_do_frame ())
 		    dwarf2out_reg_save ("", regno, offset + base_offset);
 		}
 	      else
-		fprintf (file, "\t%s [%s+%d],%s\n",
+		fprintf (file, "\t%s\t[%s+%d], %s\n",
 			 word_op, base_reg, offset, reg_names[regno]);
 
 	      offset += UNITS_PER_WORD;
@@ -5764,7 +5882,7 @@ sparc_flat_output_function_prologue (file, size)
     {
       unsigned int reg_offset = current_frame_info.reg_offset;
       char *fp_str = reg_names[FRAME_POINTER_REGNUM];
-      char *t1_str = "%g1";
+      const char *t1_str = "%g1";
 
       /* Things get a little tricky if local variables take up more than ~4096
 	 bytes and outgoing arguments take up more than ~4096 bytes.  When that
@@ -5782,26 +5900,26 @@ sparc_flat_output_function_prologue (file, size)
 	{
 	  if (size <= 4096)
 	    {
-	      fprintf (file, "\tadd %s,%d,%s\n",
+	      fprintf (file, "\tadd\t%s, %d, %s\n",
 		       sp_str, -size, sp_str);
 	      if (gmask & FRAME_POINTER_MASK)
 		{
-		  fprintf (file, "\tst %s,[%s+%d]\n",
+		  fprintf (file, "\tst\t%s, [%s+%d]\n",
 			   fp_str, sp_str, reg_offset);
-		  fprintf (file, "\tsub %s,%d,%s\t%s# set up frame pointer\n",
+		  fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
 			   sp_str, -size, fp_str, ASM_COMMENT_START);
 		  reg_offset += 4;
 		}
 	    }
 	  else
 	    {
-	      fprintf (file, "\tset %d,%s\n\tsub %s,%s,%s\n",
+	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
 		       size, t1_str, sp_str, t1_str, sp_str);
 	      if (gmask & FRAME_POINTER_MASK)
 		{
-		  fprintf (file, "\tst %s,[%s+%d]\n",
+		  fprintf (file, "\tst\t%s, [%s+%d]\n",
 			   fp_str, sp_str, reg_offset);
-		  fprintf (file, "\tadd %s,%s,%s\t%s# set up frame pointer\n",
+		  fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
 			   sp_str, t1_str, fp_str, ASM_COMMENT_START);
 		  reg_offset += 4;
 		}
@@ -5820,7 +5938,7 @@ sparc_flat_output_function_prologue (file, size)
 	    }
 	  if (gmask & RETURN_ADDR_MASK)
 	    {
-	      fprintf (file, "\tst %s,[%s+%d]\n",
+	      fprintf (file, "\tst\t%s, [%s+%d]\n",
 		       reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
 	      if (dwarf2out_do_frame ())
 		dwarf2out_return_save ("", reg_offset - size);
@@ -5842,11 +5960,11 @@ sparc_flat_output_function_prologue (file, size)
 	  
 	  if (size1 <= 4096)
 	    {
-	      fprintf (file, "\tadd %s,%d,%s\n",
+	      fprintf (file, "\tadd\t%s, %d, %s\n",
 		       sp_str, -size1, sp_str);
 	      if (gmask & FRAME_POINTER_MASK)
 		{
-		  fprintf (file, "\tst %s,[%s+%d]\n\tsub %s,%d,%s\t%s# set up frame pointer\n",
+		  fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
 			   fp_str, sp_str, offset, sp_str, -size1, fp_str,
 			   ASM_COMMENT_START);
 		  offset += 4;
@@ -5854,11 +5972,11 @@ sparc_flat_output_function_prologue (file, size)
 	    }
 	  else
 	    {
-	      fprintf (file, "\tset %d,%s\n\tsub %s,%s,%s\n",
+	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
 		       size1, t1_str, sp_str, t1_str, sp_str);
 	      if (gmask & FRAME_POINTER_MASK)
 		{
-		  fprintf (file, "\tst %s,[%s+%d]\n\tadd %s,%s,%s\t%s# set up frame pointer\n",
+		  fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
 			   fp_str, sp_str, offset, sp_str, t1_str, fp_str,
 			   ASM_COMMENT_START);
 		  offset += 4;
@@ -5878,7 +5996,7 @@ sparc_flat_output_function_prologue (file, size)
 	    }
 	  if (gmask & RETURN_ADDR_MASK)
 	    {
-	      fprintf (file, "\tst %s,[%s+%d]\n",
+	      fprintf (file, "\tst\t%s, [%s+%d]\n",
 		       reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
 	      if (dwarf2out_do_frame ())
 		/* offset - size1 == reg_offset - size
@@ -5890,7 +6008,7 @@ sparc_flat_output_function_prologue (file, size)
 				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
 				   current_frame_info.fmask,
 				   "st", "std", -size1);
-	  fprintf (file, "\tset %d,%s\n\tsub %s,%s,%s\n",
+	  fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
 		   size - size1, t1_str, sp_str, t1_str, sp_str);
 	  if (dwarf2out_do_frame ())
 	    if (! (gmask & FRAME_POINTER_MASK))
@@ -5943,22 +6061,22 @@ sparc_flat_output_function_epilogue (file, size)
       unsigned int size1;
       char *sp_str = reg_names[STACK_POINTER_REGNUM];
       char *fp_str = reg_names[FRAME_POINTER_REGNUM];
-      char *t1_str = "%g1";
+      const char *t1_str = "%g1";
 
       /* In the reload sequence, we don't need to fill the load delay
 	 slots for most of the loads, also see if we can fill the final
 	 delay slot if not otherwise filled by the reload sequence.  */
 
       if (size > 4095)
-	fprintf (file, "\tset %d,%s\n", size, t1_str);
+	fprintf (file, "\tset\t%d, %s\n", size, t1_str);
 
       if (frame_pointer_needed)
 	{
 	  if (size > 4095)
-	    fprintf (file,"\tsub %s,%s,%s\t\t%s# sp not trusted here\n",
+	    fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
 		     fp_str, t1_str, sp_str, ASM_COMMENT_START);
 	  else
-	    fprintf (file,"\tsub %s,%d,%s\t\t%s# sp not trusted here\n",
+	    fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
 		     fp_str, size, sp_str, ASM_COMMENT_START);
 	}
 
@@ -5976,7 +6094,7 @@ sparc_flat_output_function_epilogue (file, size)
 	  /* Offset to register save area from %sp.  */
 	  reg_offset = size1 - reg_offset;
 
-	  fprintf (file, "\tset %d,%s\n\tadd %s,%s,%s\n",
+	  fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
 		   size1, t1_str, sp_str, t1_str, sp_str);
 	}
 
@@ -5984,13 +6102,13 @@ sparc_flat_output_function_epilogue (file, size)
 	 because they are treated specially by the prologue output code.  */
       if (current_frame_info.gmask & FRAME_POINTER_MASK)
 	{
-	  fprintf (file, "\tld [%s+%d],%s\n",
+	  fprintf (file, "\tld\t[%s+%d], %s\n",
 		   sp_str, reg_offset, fp_str);
 	  reg_offset += 4;
 	}
       if (current_frame_info.gmask & RETURN_ADDR_MASK)
 	{
-	  fprintf (file, "\tld [%s+%d],%s\n",
+	  fprintf (file, "\tld\t[%s+%d], %s\n",
 		   sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
 	  reg_offset += 4;
 	}
@@ -6007,12 +6125,12 @@ sparc_flat_output_function_epilogue (file, size)
 	{
 	  size -= size1;
 	  if (size > 4095)
-	    fprintf (file, "\tset %d,%s\n",
+	    fprintf (file, "\tset\t%d, %s\n",
 		     size, t1_str);
 	}
 
       if (current_function_returns_struct)
-	fprintf (file, "\tjmp %%o7+12\n");
+	fprintf (file, "\tjmp\t%%o7+12\n");
       else
 	fprintf (file, "\tretl\n");
 
@@ -6029,10 +6147,10 @@ sparc_flat_output_function_epilogue (file, size)
 	}
 
       else if (size > 4095)
-	fprintf (file, "\tadd %s,%s,%s\n", sp_str, t1_str, sp_str);
+	fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
 
       else if (size > 0)
-	fprintf (file, "\tadd %s,%d,%s\n", sp_str, size, sp_str);
+	fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
 
       else
 	fprintf (file, "\tnop\n");
@@ -6040,6 +6158,8 @@ sparc_flat_output_function_epilogue (file, size)
 
   /* Reset state info for each function.  */
   current_frame_info = zero_frame_info;
+
+  sparc_output_deferred_case_vectors ();
 }
 
 /* Define the number of delay slots needed for the function epilogue.
@@ -6089,7 +6209,7 @@ sparc_flat_eligible_for_epilogue_delay (trial, slot)
 /* Adjust the cost of a scheduling dependency.  Return the new cost of
    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
 
-int
+static int
 supersparc_adjust_cost (insn, link, dep_insn, cost)
      rtx insn;
      rtx link;
@@ -6109,8 +6229,8 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
 	 cycles later.  */
 
       /* if a load, then the dependence must be on the memory address;
-	 add an extra 'cycle'.  Note that the cost could be two cycles
-	 if the reg was written late in an instruction group; we can't tell
+	 add an extra "cycle".  Note that the cost could be two cycles
+	 if the reg was written late in an instruction group; we ca not tell
 	 here.  */
       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
 	return cost + 3;
@@ -6122,7 +6242,7 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
 	  rtx dep_pat = PATTERN (dep_insn);
 
 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
-	    return cost;  /* This shouldn't happen!  */
+	    return cost;  /* This should not happen!  */
 
 	  /* The dependency between the two instructions was on the data that
 	     is being stored.  Assume that this implies that the address of the
@@ -6154,67 +6274,161 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
   return cost;
 }
 
-int
+static int
+hypersparc_adjust_cost (insn, link, dep_insn, cost)
+     rtx insn;
+     rtx link;
+     rtx dep_insn;
+     int cost;
+{
+  enum attr_type insn_type, dep_type;
+  rtx pat = PATTERN(insn);
+  rtx dep_pat = PATTERN (dep_insn);
+
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
+
+  insn_type = get_attr_type (insn);
+  dep_type = get_attr_type (dep_insn);
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case 0:
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */
+
+      switch (insn_type)
+	{
+	case TYPE_STORE:
+	case TYPE_FPSTORE:
+	  /* Get the delay iff the address of the store is the dependence. */
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;
+
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return cost;
+	  return cost + 3;
+
+	case TYPE_LOAD:
+	case TYPE_SLOAD:
+	case TYPE_FPLOAD:
+	  /* If a load, then the dependence must be on the memory address.  If
+	     the addresses aren't equal, then it might be a false dependency */
+	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+	    {
+	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+		  || GET_CODE (SET_DEST (dep_pat)) != MEM        
+		  || GET_CODE (SET_SRC (pat)) != MEM
+		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
+				    XEXP (SET_SRC (pat), 0)))
+		return cost + 2;
+
+	      return cost + 8;        
+	    }
+	  break;
+
+	case TYPE_BRANCH:
+	  /* Compare to branch latency is 0.  There is no benefit from
+	     separating compare and branch.  */
+	  if (dep_type == TYPE_COMPARE)
+	    return 0;
+	  /* Floating point compare to branch latency is less than
+	     compare to conditional move.  */
+	  if (dep_type == TYPE_FPCMP)
+	    return cost - 1;
+	  break;
+	default:
+	  break;
+	}
+	break;
+
+    case REG_DEP_ANTI:
+      /* Anti-dependencies only penalize the fpu unit. */
+      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+        return 0;
+      break;
+
+    default:
+      break;
+    }    
+
+  return cost;
+}
+
+static int
 ultrasparc_adjust_cost (insn, link, dep_insn, cost)
-     rtx insn;                                     
-     rtx link;                                     
-     rtx dep_insn;                                     
-     int cost;                                     
+     rtx insn;
+     rtx link;
+     rtx dep_insn;
+     int cost;
 {
   enum attr_type insn_type, dep_type;
-  rtx pat = PATTERN(insn);                                                    
-  rtx dep_pat = PATTERN (dep_insn);                                           
+  rtx pat = PATTERN(insn);
+  rtx dep_pat = PATTERN (dep_insn);
+
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
 
-  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)        
-    return cost;                                     
+  insn_type = get_attr_type (insn);
+  dep_type = get_attr_type (dep_insn);
 
-  insn_type = get_attr_type (insn);                     
-  dep_type = get_attr_type (dep_insn);                  
+  /* Nothing issues in parallel with integer multiplies, so
+     mark as zero cost since the scheduler can not do anything
+     about it.  */
+  if (insn_type == TYPE_IMUL)
+    return 0;
 
 #define SLOW_FP(dep_type) \
 (dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
 
   switch (REG_NOTE_KIND (link))
-    {                                              
-    case 0:                                        
+    {
+    case 0:
       /* Data dependency; DEP_INSN writes a register that INSN reads some
-	 cycles later.  */                               
+	 cycles later.  */
+
+      if (dep_type == TYPE_CMOVE)
+	{
+	  /* Instructions that read the result of conditional moves cannot
+	     be in the same group or the following group.  */
+	  return cost + 1;
+	}
 
       switch (insn_type)
-	{                              
-	  /* UltraSPARC can dual issue a store and an instruction setting       
-	     the value stored, except for divide and square root.  */           
+	{
+	  /* UltraSPARC can dual issue a store and an instruction setting
+	     the value stored, except for divide and square root.  */
 	case TYPE_FPSTORE:
-	  if (! SLOW_FP (dep_type))        
-	    return 0;                                     
+	  if (! SLOW_FP (dep_type))
+	    return 0;
 	  return cost;
 
-	case TYPE_STORE:                                  
+	case TYPE_STORE:
 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
-	    return cost;     
+	    return cost;
 
 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
-	  /* The dependency between the two instructions is on the data
-	     that is being stored.  Assume that the address of the store
-	     is not also dependent.  */
-	    return 0;                                
-	  return cost;                                   
-
-	case TYPE_LOAD:   
-	case TYPE_SLOAD:               
-	case TYPE_FPLOAD:                                                       
-	  /* A load does not return data until at least 11 cycles after         
+	    /* The dependency between the two instructions is on the data
+	       that is being stored.  Assume that the address of the store
+	       is not also dependent.  */
+	    return 0;
+	  return cost;
+
+	case TYPE_LOAD:
+	case TYPE_SLOAD:
+	case TYPE_FPLOAD:
+	  /* A load does not return data until at least 11 cycles after
 	     a store to the same location.  3 cycles are accounted for
 	     in the load latency; add the other 8 here.  */
 	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
-	    {   
+	    {
 	      /* If the addresses are not equal this may be a false
 		 dependency because pointer aliasing could not be
 		 determined.  Add only 2 cycles in that case.  2 is
 		 an arbitrary compromise between 8, which would cause
 		 the scheduler to generate worse code elsewhere to
-		 compensate for a dependency which might not really    
-		 exist, and 0.  */                                      
+		 compensate for a dependency which might not really
+		 exist, and 0.  */
 	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
 		  || GET_CODE (SET_SRC (pat)) != MEM
 		  || GET_CODE (SET_DEST (dep_pat)) != MEM
@@ -6222,72 +6436,831 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
 				    XEXP (SET_DEST (dep_pat), 0)))
 		return cost + 2;
 
-	      return cost + 8;         
-	    }                                                                   
+	      return cost + 8;
+	    }
 	  return cost;
 
-	case TYPE_BRANCH:                                  
+	case TYPE_BRANCH:
 	  /* Compare to branch latency is 0.  There is no benefit from
 	     separating compare and branch.  */
-	  if (dep_type == TYPE_COMPARE)                            
-	    return 0;                                            
-	  /* Floating point compare to branch latency is less than 
-	     compare to conditional move.  */                        
-	  if (dep_type == TYPE_FPCMP)                             
-	    return cost - 1;                                           
+	  if (dep_type == TYPE_COMPARE)
+	    return 0;
+	  /* Floating point compare to branch latency is less than
+	     compare to conditional move.  */
+	  if (dep_type == TYPE_FPCMP)
+	    return cost - 1;
 	  return cost;
 
-	case TYPE_FPCMOVE:                                    
+	case TYPE_FPCMOVE:
 	  /* FMOVR class instructions can not issue in the same cycle
 	     or the cycle after an instruction which writes any
 	     integer register.  Model this as cost 2 for dependent
-	     instructions.  */  
+	     instructions.  */
 	  if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
 	       || dep_type == TYPE_BINARY)
-	      && cost < 2)                                                      
+	      && cost < 2)
 	    return 2;
 	  /* Otherwise check as for integer conditional moves. */
 
-	case TYPE_CMOVE:                       
+	case TYPE_CMOVE:
 	  /* Conditional moves involving integer registers wait until
 	     3 cycles after loads return data.  The interlock applies
 	     to all loads, not just dependent loads, but that is hard
-	     to model.  */                        
-	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)                  
-	    return cost + 3;                                           
+	     to model.  */
+	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
+	    return cost + 3;
 	  return cost;
 
 	default:
 	  break;
 	}
-	break;                                                
+      break;
 
-    case REG_DEP_ANTI:                                       
+    case REG_DEP_ANTI:
       /* Divide and square root lock destination registers for full latency. */
-      if (! SLOW_FP (dep_type))             
-	return 0;                                               
-      break;                                                                  
+      if (! SLOW_FP (dep_type))
+	return 0;
+      break;
+
+    case REG_DEP_OUTPUT:
+      /* IEU and FPU instruction that have the same destination
+	 register cannot be grouped together.  */
+      return cost + 1;
 
     default:
       break;
     }
 
-  /* Other costs not accounted for:                            
-     - Multiply should be modeled as having no latency because there is
-       nothing the scheduler can do about it.  
-     - Single precision floating point loads lock the other half of  
-       the even/odd register pair.                                   
+  /* Other costs not accounted for:
+     - Single precision floating point loads lock the other half of
+       the even/odd register pair.
      - Several hazards associated with ldd/std are ignored because these
-       instructions are rarely generated for V9.  
-     - A shift following an integer instruction which does not set the
-       condition codes can not issue in the same cycle.
+       instructions are rarely generated for V9.
      - The floating point pipeline can not have both a single and double
        precision operation active at the same time.  Format conversions
        and graphics instructions are given honorary double precision status.
      - call and jmpl are always the first instruction in a group.  */
 
-  return cost;                                                              
-}  
+  return cost;
+
+#undef SLOW_FP
+}
+
+int
+sparc_adjust_cost(insn, link, dep, cost)
+     rtx insn;
+     rtx link;
+     rtx dep;
+     int cost;
+{
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_SUPERSPARC:
+      cost = supersparc_adjust_cost (insn, link, dep, cost);
+      break;
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      cost = hypersparc_adjust_cost (insn, link, dep, cost);
+      break;
+    case PROCESSOR_ULTRASPARC:
+      cost = ultrasparc_adjust_cost (insn, link, dep, cost);
+      break;
+    default:
+      break;
+    }
+  return cost;
+}
+
+/* This describes the state of the UltraSPARC pipeline during
+   instruction scheduling.  */
+
+#define TMASK(__x)	((unsigned)1 << ((int)(__x)))
+#define UMASK(__x)	((unsigned)1 << ((int)(__x)))
+
+enum ultra_code { NONE=0, /* no insn at all				*/
+		  IEU0,   /* shifts and conditional moves		*/
+		  IEU1,   /* condition code setting insns, calls+jumps	*/
+		  IEUN,   /* all other single cycle ieu insns		*/
+		  LSU,    /* loads and stores				*/
+		  CTI,    /* branches					*/
+		  FPM,    /* FPU pipeline 1, multiplies and divides	*/
+		  FPA,    /* FPU pipeline 2, all other operations	*/
+		  SINGLE, /* single issue instructions			*/
+		  NUM_ULTRA_CODES };
+
+static const char *ultra_code_names[NUM_ULTRA_CODES] = {
+  "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
+  "FPM", "FPA", "SINGLE" };
+
+struct ultrasparc_pipeline_state {
+  /* The insns in this group.  */
+  rtx group[4];
+
+  /* The code for each insn.  */
+  enum ultra_code codes[4];
+
+  /* Which insns in this group have been committed by the
+     scheduler.  This is how we determine how many more
+     can issue this cycle.  */
+  char commit[4];
+
+  /* How many insns in this group.  */
+  char group_size;
+
+  /* Mask of free slots still in this group.  */
+  char free_slot_mask;
+
+  /* The slotter uses the following to determine what other
+     insn types can still make their way into this group.  */
+  char contents [NUM_ULTRA_CODES];
+  char num_ieu_insns;
+};
+
+#define ULTRA_NUM_HIST	8
+static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
+static int ultra_cur_hist;
+static int ultra_cycles_elapsed;
+
+#define ultra_pipe	(ultra_pipe_hist[ultra_cur_hist])
+
+/* Given TYPE_MASK compute the ultra_code it has.  */
+static enum ultra_code
+ultra_code_from_mask (type_mask)
+     int type_mask;
+{
+  if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
+    return IEU0;
+  else if (type_mask & (TMASK (TYPE_COMPARE) |
+			TMASK (TYPE_CALL) |
+			TMASK (TYPE_UNCOND_BRANCH)))
+    return IEU1;
+  else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
+			TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
+    return IEUN;
+  else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
+			TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
+			TMASK (TYPE_FPSTORE)))
+    return LSU;
+  else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
+			TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
+    return FPM;
+  else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
+			TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
+    return FPA;
+  else if (type_mask & TMASK (TYPE_BRANCH))
+    return CTI;
+
+  return SINGLE;
+}
+
+/* Check INSN (a conditional move) and make sure that it's
+   results are available at this cycle.  Return 1 if the
+   results are in fact ready.  */
+static int
+ultra_cmove_results_ready_p (insn)
+     rtx insn;
+{
+  struct ultrasparc_pipeline_state *up;
+  int entry, slot;
+
+  /* If this got dispatched in the previous
+     group, the results are not ready.  */
+  entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
+  up = &ultra_pipe_hist[entry];
+  slot = 4;
+  while (--slot >= 0)
+    if (up->group[slot] == insn)
+      return 0;
+
+  return 1;
+}
+
+/* Walk backwards in pipeline history looking for FPU
+   operations which use a mode different than FPMODE and
+   will create a stall if an insn using FPMODE were to be
+   dispatched this cycle.  */
+static int
+ultra_fpmode_conflict_exists (fpmode)
+     enum machine_mode fpmode;
+{
+  int hist_ent;
+  int hist_lim;
+
+  hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
+  if (ultra_cycles_elapsed < 4)
+    hist_lim = ultra_cycles_elapsed;
+  else
+    hist_lim = 4;
+  while (hist_lim > 0)
+    {
+      struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
+      int slot = 4;
+
+      while (--slot >= 0)
+	{
+	  rtx insn = up->group[slot];
+	  enum machine_mode this_mode;
+	  rtx pat;
+
+	  if (! insn
+	      || GET_CODE (insn) != INSN
+	      || (pat = PATTERN (insn)) == 0
+	      || GET_CODE (pat) != SET)
+	    continue;
+
+	  this_mode = GET_MODE (SET_DEST (pat));
+	  if ((this_mode != SFmode
+	       && this_mode != DFmode)
+	      || this_mode == fpmode)
+	    continue;
+
+	  /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
+	     we will get a stall.  Loads and stores are independant
+	     of these rules.  */
+	  if (GET_CODE (SET_SRC (pat)) != ABS
+	      && GET_CODE (SET_SRC (pat)) != NEG
+	      && ((TMASK (get_attr_type (insn)) &
+		   (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
+		    TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
+                    TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
+	    return 1;
+	}
+      hist_lim--;
+      hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
+    }
+
+  /* No conflicts, safe to dispatch.  */
+  return 0;
+}
+
+/* Find an instruction in LIST which has one of the
+   type attributes enumerated in TYPE_MASK.  START
+   says where to begin the search.
+
+   NOTE: This scheme depends upon the fact that we
+         have less than 32 distinct type attributes.  */
+
+static int ultra_types_avail;
+
+static rtx *
+ultra_find_type (type_mask, list, start)
+     int type_mask;
+     rtx *list;
+     int start;
+{
+  int i;
+
+  /* Short circuit if no such insn exists in the ready
+     at the moment.  */
+  if ((type_mask & ultra_types_avail) == 0)
+    return 0;
+
+  for (i = start; i >= 0; i--)
+    {
+      rtx insn = list[i];
+
+      if (recog_memoized (insn) >= 0
+	  && (TMASK(get_attr_type (insn)) & type_mask))
+	{
+	  enum machine_mode fpmode = SFmode;
+	  rtx pat = 0;
+	  int slot;
+	  int check_depend = 0;
+	  int check_fpmode_conflict = 0;
+
+	  if (GET_CODE (insn) == INSN
+	      && (pat = PATTERN(insn)) != 0
+	      && GET_CODE (pat) == SET
+	      && !(type_mask & (TMASK (TYPE_STORE) |
+				TMASK (TYPE_FPSTORE))))
+	    {
+	      check_depend = 1;
+	      if (GET_MODE (SET_DEST (pat)) == SFmode
+		  || GET_MODE (SET_DEST (pat)) == DFmode)
+		{
+		  fpmode = GET_MODE (SET_DEST (pat));
+		  check_fpmode_conflict = 1;
+		}
+	    }
+
+	  slot = 4;
+	  while(--slot >= 0)
+	    {
+	      rtx slot_insn = ultra_pipe.group[slot];
+	      rtx slot_pat;
+
+	      /* Already issued, bad dependency, or FPU
+		 mode conflict.  */
+	      if (slot_insn != 0
+		  && (slot_pat = PATTERN (slot_insn)) != 0
+		  && ((insn == slot_insn)
+		      || (check_depend == 1
+			  && GET_CODE (slot_insn) == INSN
+			  && GET_CODE (slot_pat) == SET
+			  && ((GET_CODE (SET_DEST (slot_pat)) == REG
+			       && GET_CODE (SET_SRC (pat)) == REG
+			       && REGNO (SET_DEST (slot_pat)) ==
+			            REGNO (SET_SRC (pat)))
+			      || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
+				  && GET_CODE (SET_SRC (pat)) == SUBREG
+				  && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
+				       REGNO (SUBREG_REG (SET_SRC (pat)))
+				  && SUBREG_WORD (SET_DEST (slot_pat)) ==
+				       SUBREG_WORD (SET_SRC (pat)))))
+		      || (check_fpmode_conflict == 1
+			  && GET_CODE (slot_insn) == INSN
+			  && GET_CODE (slot_pat) == SET
+			  && (GET_MODE (SET_DEST (slot_pat)) == SFmode
+			      || GET_MODE (SET_DEST (slot_pat)) == DFmode)
+			  && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
+		goto next;
+	    }
+
+	  /* Check for peculiar result availability and dispatch
+	     interference situations.  */
+	  if (pat != 0
+	      && ultra_cycles_elapsed > 0)
+	    {
+	      rtx link;
+
+	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+		{
+		  rtx link_insn = XEXP (link, 0);
+		  if (GET_CODE (link_insn) == INSN
+		      && recog_memoized (link_insn) >= 0
+		      && (TMASK (get_attr_type (link_insn)) &
+			  (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
+		      && ! ultra_cmove_results_ready_p (link_insn))
+		    goto next;
+		}
+
+	      if (check_fpmode_conflict
+		  && ultra_fpmode_conflict_exists (fpmode))
+		goto next;
+	    }
+
+	  return &list[i];
+	}
+    next:
+      ;
+    }
+  return 0;
+}
+
+static void
+ultra_build_types_avail (ready, n_ready)
+  rtx *ready;
+  int n_ready;
+{
+  int i = n_ready - 1;
+
+  ultra_types_avail = 0;
+  while(i >= 0)
+    {
+      rtx insn = ready[i];
+
+      if (recog_memoized (insn) >= 0)
+	ultra_types_avail |= TMASK (get_attr_type (insn));
+
+      i -= 1;
+    }
+}
+
+/* Place insn pointed to my IP into the pipeline.
+   Make element THIS of READY be that insn if it
+   is not already.  TYPE indicates the pipeline class
+   this insn falls into.  */
+static void
+ultra_schedule_insn (ip, ready, this, type)
+     rtx *ip;
+     rtx *ready;
+     int this;
+     enum ultra_code type;
+{
+  int pipe_slot;
+  char mask = ultra_pipe.free_slot_mask;
+
+  /* Obtain free slot.  */
+  for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
+    if ((mask & (1 << pipe_slot)) != 0)
+      break;
+  if (pipe_slot == 4)
+    abort ();
+
+  /* In it goes, and it hasn't been committed yet.  */
+  ultra_pipe.group[pipe_slot] = *ip;
+  ultra_pipe.codes[pipe_slot] = type;
+  ultra_pipe.contents[type] = 1;
+  if (UMASK (type) &
+      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
+    ultra_pipe.num_ieu_insns += 1;
+
+  ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
+  ultra_pipe.group_size += 1;
+  ultra_pipe.commit[pipe_slot] = 0;
+
+  /* Update ready list.  */
+  if (ip != &ready[this])
+    {
+      rtx temp = *ip;
+
+      *ip = ready[this];
+      ready[this] = temp;
+    }
+}
+
+/* Advance to the next pipeline group.  */
+static void
+ultra_flush_pipeline ()
+{
+  ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
+  ultra_cycles_elapsed += 1;
+  bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
+  ultra_pipe.free_slot_mask = 0xf;
+}
+
+static int ultra_reorder_called_this_block;
+
+/* Init our data structures for this current block.  */
+void
+ultrasparc_sched_init (dump, sched_verbose)
+     FILE *dump ATTRIBUTE_UNUSED;
+     int sched_verbose ATTRIBUTE_UNUSED;
+{
+  bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
+  ultra_cur_hist = 0;
+  ultra_cycles_elapsed = 0;
+  ultra_reorder_called_this_block = 0;
+  ultra_pipe.free_slot_mask = 0xf;
+}
+
+/* INSN has been scheduled, update pipeline commit state
+   and return how many instructions are still to be
+   scheduled in this group.  */
+int
+ultrasparc_variable_issue (insn)
+     rtx insn;
+{
+  struct ultrasparc_pipeline_state *up = &ultra_pipe;
+  int i, left_to_fire;
+
+  left_to_fire = 0;
+  for (i = 0; i < 4; i++)
+    {
+      if (up->group[i] == 0)
+	continue;
+
+      if (up->group[i] == insn)
+	{
+	  up->commit[i] = 1;
+	}
+      else if (! up->commit[i])
+	left_to_fire++;
+    }
+
+  return left_to_fire;
+}
+
+/* In actual_hazard_this_instance, we may have yanked some
+   instructions from the ready list due to conflict cost
+   adjustments.  If so, and such an insn was in our pipeline
+   group, remove it and update state.  */
+static void
+ultra_rescan_pipeline_state (ready, n_ready)
+     rtx *ready;
+     int n_ready;
+{
+  struct ultrasparc_pipeline_state *up = &ultra_pipe;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    {
+      rtx insn = up->group[i];
+      int j;
+
+      if (! insn)
+	continue;
+
+      /* If it has been committed, then it was removed from
+	 the ready list because it was actually scheduled,
+	 and that is not the case we are searching for here.  */
+      if (up->commit[i] != 0)
+	continue;
+
+      for (j = n_ready - 1; j >= 0; j--)
+	if (ready[j] == insn)
+	  break;
+
+      /* If we didn't find it, toss it.  */
+      if (j < 0)
+	{
+	  enum ultra_code ucode = up->codes[i];
+
+	  up->group[i] = 0;
+	  up->codes[i] = NONE;
+	  up->contents[ucode] = 0;
+	  if (UMASK (ucode) &
+	      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
+	    up->num_ieu_insns -= 1;
+
+	  up->free_slot_mask |= (1 << i);
+	  up->group_size -= 1;
+	  up->commit[i] = 0;
+	}
+    }
+}
+
+void
+ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
+     FILE *dump;
+     int sched_verbose;
+     rtx *ready;
+     int n_ready;
+{
+  struct ultrasparc_pipeline_state *up = &ultra_pipe;
+  int i, this_insn;
+
+  /* We get called once unnecessarily per block of insns
+     scheduled.  */
+  if (ultra_reorder_called_this_block == 0)
+    {
+      ultra_reorder_called_this_block = 1;
+      return;
+    }
+
+  if (sched_verbose)
+    {
+      int n;
+
+      fprintf (dump, "\n;;\tUltraSPARC Looking at [");
+      for (n = n_ready - 1; n >= 0; n--)
+	{
+	  rtx insn = ready[n];
+	  enum ultra_code ucode;
+
+	  if (recog_memoized (insn) < 0)
+	    continue;
+	  ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
+	  if (n != 0)
+	    fprintf (dump, "%s(%d) ",
+		     ultra_code_names[ucode],
+		     INSN_UID (insn));
+	  else
+	    fprintf (dump, "%s(%d)",
+		     ultra_code_names[ucode],
+		     INSN_UID (insn));
+	}
+      fprintf (dump, "]\n");
+    }
+
+  this_insn = n_ready - 1;
+
+  /* Skip over junk we don't understand.  */
+  while ((this_insn >= 0)
+	 && recog_memoized (ready[this_insn]) < 0)
+    this_insn--;
+
+  ultra_build_types_avail (ready, this_insn + 1);
+
+  while (this_insn >= 0) {
+    int old_group_size = up->group_size;
+
+    if (up->group_size != 0)
+      {
+	int num_committed;
+
+	num_committed = (up->commit[0] + up->commit[1] +
+			 up->commit[2] + up->commit[3]);
+	/* If nothing has been commited from our group, or all of
+	   them have.  Clear out the (current cycle's) pipeline
+	   state and start afresh.  */
+	if (num_committed == 0
+	    || num_committed == up->group_size)
+	  {
+	    ultra_flush_pipeline ();
+	    up = &ultra_pipe;
+	    old_group_size = 0;
+	  }
+	else
+	  {
+	    /* OK, some ready list insns got requeued and thus removed
+	       from the ready list.  Account for this fact.  */
+	    ultra_rescan_pipeline_state (ready, n_ready);
+
+	    /* Something "changed", make this look like a newly
+	       formed group so the code at the end of the loop
+	       knows that progress was in fact made.  */
+	    if (up->group_size != old_group_size)
+	      old_group_size = 0;
+	  }
+      }
+
+    if (up->group_size == 0)
+      {
+	/* If the pipeline is (still) empty and we have any single
+	   group insns, get them out now as this is a good time.  */
+	rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
+				    TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
+				    TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
+				   ready, this_insn);
+	if (ip)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, SINGLE);
+	    break;
+	  }
+
+	/* If we are not in the process of emptying out the pipe, try to
+	   obtain an instruction which must be the first in it's group.  */
+	ip = ultra_find_type ((TMASK (TYPE_CALL) |
+			       TMASK (TYPE_CALL_NO_DELAY_SLOT) |
+			       TMASK (TYPE_UNCOND_BRANCH)),
+			      ready, this_insn);
+	if (ip)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, IEU1);
+	    this_insn--;
+	  }
+	else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
+					 TMASK (TYPE_FPDIVD) |
+					 TMASK (TYPE_FPSQRT)),
+					ready, this_insn)) != 0)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, FPM);
+	    this_insn--;
+	  }
+      }
+
+    /* Try to fill the integer pipeline.  First, look for an IEU0 specific
+       operation.  We can't do more IEU operations if the first 3 slots are
+       all full or we have dispatched two IEU insns already.  */
+    if ((up->free_slot_mask & 0x7) != 0
+	&& up->num_ieu_insns < 2
+	&& up->contents[IEU0] == 0
+	&& up->contents[IEUN] == 0)
+      {
+	rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
+	if (ip)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, IEU0);
+	    this_insn--;
+	  }
+      }
+
+    /* If we can, try to find an IEU1 specific or an unnamed
+       IEU instruction.  */
+    if ((up->free_slot_mask & 0x7) != 0
+	&& up->num_ieu_insns < 2)
+      {
+	rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
+				    TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
+				    (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
+				   ready, this_insn);
+	if (ip)
+	  {
+	    rtx insn = *ip;
+
+	    ultra_schedule_insn (ip, ready, this_insn,
+				 (!up->contents[IEU1]
+				  && get_attr_type (insn) == TYPE_COMPARE)
+				 ? IEU1 : IEUN);
+	    this_insn--;
+	  }
+      }
+
+    /* If only one IEU insn has been found, try to find another unnamed
+       IEU operation or an IEU1 specific one.  */
+    if ((up->free_slot_mask & 0x7) != 0
+	&& up->num_ieu_insns < 2)
+      {
+	rtx *ip;
+	int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
+		     TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
+
+	if (!up->contents[IEU1])
+	  tmask |= TMASK (TYPE_COMPARE);
+	ip = ultra_find_type (tmask, ready, this_insn);
+	if (ip)
+	  {
+	    rtx insn = *ip;
+
+	    ultra_schedule_insn (ip, ready, this_insn,
+				 (!up->contents[IEU1]
+				  && get_attr_type (insn) == TYPE_COMPARE)
+				 ? IEU1 : IEUN);
+	    this_insn--;
+	  }
+      }
+
+    /* Try for a load or store, but such an insn can only be issued
+       if it is within' one of the first 3 slots.  */
+    if ((up->free_slot_mask & 0x7) != 0
+        && up->contents[LSU] == 0)
+      {
+	rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
+				   TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
+				   TMASK (TYPE_FPSTORE)), ready, this_insn);
+	if (ip)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, LSU);
+	    this_insn--;
+	  }
+      }
+
+    /* Now find FPU operations, first FPM class.  But not divisions or
+       square-roots because those will break the group up.  Unlike all
+       the previous types, these can go in any slot.  */
+    if (up->free_slot_mask != 0
+	&& up->contents[FPM] == 0)
+      {
+	rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
+	if (ip)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, FPM);
+	    this_insn--;
+	  }
+      }
+    
+    /* Continue on with FPA class if we have not filled the group already.  */
+    if (up->free_slot_mask != 0
+	&& up->contents[FPA] == 0)
+      {
+	rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
+				    TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
+				   ready, this_insn);
+	if (ip)
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, FPA);
+	    this_insn--;
+	  }
+      }
+
+    /* Finally, maybe stick a branch in here.  */
+    if (up->free_slot_mask != 0
+	&& up->contents[CTI] == 0)
+      {
+	rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
+
+	/* Try to slip in a branch only if it is one of the
+	   next 2 in the ready list.  */
+	if (ip && ((&ready[this_insn] - ip) < 2))
+	  {
+	    ultra_schedule_insn (ip, ready, this_insn, CTI);
+	    this_insn--;
+	  }
+      }
+
+    up->group_size = 0;
+    for (i = 0; i < 4; i++)
+      if ((up->free_slot_mask & (1 << i)) == 0)
+	up->group_size++;
+
+    /* See if we made any progress...  */
+    if (old_group_size != up->group_size)
+      break;
+
+    /* Clean out the (current cycle's) pipeline state
+       and try once more.  If we placed no instructions
+       into the pipeline at all, it means a real hard
+       conflict exists with some earlier issued instruction
+       so we must advance to the next cycle to clear it up.  */
+    if (up->group_size == 0)
+      {
+	ultra_flush_pipeline ();
+	up = &ultra_pipe;
+      }
+    else
+      {
+	bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
+	ultra_pipe.free_slot_mask = 0xf;
+      }
+  }
+
+  if (sched_verbose)
+    {
+      int n, gsize;
+
+      fprintf (dump, ";;\tUltraSPARC Launched   [");
+      gsize = up->group_size;
+      for (n = 0; n < 4; n++)
+	{
+	  rtx insn = up->group[n];
+
+	  if (! insn)
+	    continue;
+
+	  gsize -= 1;
+	  if (gsize != 0)
+	    fprintf (dump, "%s(%d) ",
+		     ultra_code_names[up->codes[n]],
+		     INSN_UID (insn));
+	  else
+	    fprintf (dump, "%s(%d)",
+		     ultra_code_names[up->codes[n]],
+		     INSN_UID (insn));
+	}
+      fprintf (dump, "]\n");
+    }
+}
 
 int                                                           
 sparc_issue_rate ()
@@ -6301,6 +7274,9 @@ sparc_issue_rate ()
       return 2;
     case PROCESSOR_SUPERSPARC:                                        
       return 3;                                                      
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      return 2;
     case PROCESSOR_ULTRASPARC:                                            
       return 4;                                                    
     }
@@ -6322,8 +7298,6 @@ set_extends(x, insn)
       /* LO_SUM is used with sethi.  sethi cleared the high
 	 bits and the values used with lo_sum are positive */
     case LO_SUM:
-      /* UNSPEC is v8plus_clear_high */
-    case UNSPEC:
       /* Store flag stores 0 or 1 */
     case LT: case LTU:
     case GT: case GTU:
@@ -6360,6 +7334,110 @@ set_extends(x, insn)
     }
 }
 
+/* We _ought_ to have only one kind per function, but... */
+static rtx sparc_addr_diff_list;
+static rtx sparc_addr_list;
+
+void
+sparc_defer_case_vector (lab, vec, diff)
+     rtx lab, vec;
+     int diff;
+{
+  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
+  if (diff)
+    sparc_addr_diff_list
+      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
+  else
+    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
+}
+
+static void 
+sparc_output_addr_vec (vec)
+     rtx vec;
+{
+  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
+  int idx, vlen = XVECLEN (body, 0);
+
+#ifdef ASM_OUTPUT_ADDR_VEC_START  
+  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
+#endif
+
+#ifdef ASM_OUTPUT_CASE_LABEL
+  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
+			 NEXT_INSN (lab));
+#else
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
+#endif
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_VEC_ELT
+	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
+    }
+    
+#ifdef ASM_OUTPUT_ADDR_VEC_END
+  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
+#endif
+}
+
+static void 
+sparc_output_addr_diff_vec (vec)
+     rtx vec;
+{
+  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
+  rtx base = XEXP (XEXP (body, 0), 0);
+  int idx, vlen = XVECLEN (body, 1);
+
+#ifdef ASM_OUTPUT_ADDR_VEC_START  
+  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
+#endif
+
+#ifdef ASM_OUTPUT_CASE_LABEL
+  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
+			 NEXT_INSN (lab));
+#else
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
+#endif
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_DIFF_ELT
+        (asm_out_file,
+         body,
+         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
+         CODE_LABEL_NUMBER (base));
+    }
+    
+#ifdef ASM_OUTPUT_ADDR_VEC_END
+  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
+#endif
+}
+
+static void
+sparc_output_deferred_case_vectors ()
+{
+  rtx t;
+  int align;
+
+  if (sparc_addr_list == NULL_RTX
+      && sparc_addr_diff_list == NULL_RTX)
+    return;
+
+  /* Align to cache line in the function's code section.  */
+  function_section (current_function_decl);
+
+  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+  if (align > 0)
+    ASM_OUTPUT_ALIGN (asm_out_file, align);
+  
+  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
+    sparc_output_addr_vec (XEXP (t, 0));
+  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
+    sparc_output_addr_diff_vec (XEXP (t, 0));
+
+  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
+}
+
 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
    unknown.  Return 1 if the high bits are zero, -1 if the register is
    sign extended.  */
@@ -6452,10 +7530,314 @@ sparc_return_peephole_ok (dest, src)
 {
   if (! TARGET_V9)
     return 0;
-  if (leaf_function)
+  if (current_function_uses_only_leaf_regs)
     return 0;
   if (GET_CODE (src) != CONST_INT
       && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
     return 0;
   return IN_OR_GLOBAL_P (dest);
 }
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.
+
+   32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
+   during profiling so we need to save/restore it around the call to mcount.
+   We're guaranteed that a save has just been done, and we use the space
+   allocated for intreg/fpreg value passing.  */
+
+void
+sparc_function_profiler (file, labelno)
+     FILE *file;
+     int labelno;
+{
+  char buf[32];
+  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+
+  if (! TARGET_ARCH64)
+    fputs ("\tst\t%g2,[%fp-4]\n", file);
+
+  fputs ("\tsethi\t%hi(", file);
+  assemble_name (file, buf);
+  fputs ("),%o0\n", file);
+
+  fputs ("\tcall\t", file);
+  assemble_name (file, MCOUNT_FUNCTION);
+  putc ('\n', file);
+
+  fputs ("\t or\t%o0,%lo(", file);
+  assemble_name (file, buf);
+  fputs ("),%o0\n", file);
+
+  if (! TARGET_ARCH64)
+    fputs ("\tld\t[%fp-4],%g2\n", file);
+}
+
+
+/* The following macro shall output assembler code to FILE
+   to initialize basic-block profiling.
+
+   If profile_block_flag == 2
+
+	Output code to call the subroutine `__bb_init_trace_func'
+	and pass two parameters to it. The first parameter is
+	the address of a block allocated in the object module.
+	The second parameter is the number of the first basic block
+	of the function.
+
+	The name of the block is a local symbol made with this statement:
+	
+	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
+
+	Of course, since you are writing the definition of
+	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
+	can take a short cut in the definition of this macro and use the
+	name that you know will result.
+
+	The number of the first basic block of the function is
+	passed to the macro in BLOCK_OR_LABEL.
+
+	If described in a virtual assembler language the code to be
+	output looks like:
+
+		parameter1 <- LPBX0
+		parameter2 <- BLOCK_OR_LABEL
+		call __bb_init_trace_func
+
+    else if profile_block_flag != 0
+
+	Output code to call the subroutine `__bb_init_func'
+	and pass one single parameter to it, which is the same
+	as the first parameter to `__bb_init_trace_func'.
+
+	The first word of this parameter is a flag which will be nonzero if
+	the object module has already been initialized.  So test this word
+	first, and do not call `__bb_init_func' if the flag is nonzero.
+	Note: When profile_block_flag == 2 the test need not be done
+	but `__bb_init_trace_func' *must* be called.
+
+	BLOCK_OR_LABEL may be used to generate a label number as a
+	branch destination in case `__bb_init_func' will not be called.
+
+	If described in a virtual assembler language the code to be
+	output looks like:
+
+		cmp (LPBX0),0
+		jne local_label
+		parameter1 <- LPBX0
+		call __bb_init_func
+	    local_label:
+
+*/
+
+void
+sparc_function_block_profiler(file, block_or_label)
+     FILE *file;
+     int block_or_label;
+{
+  char LPBX[32];
+  ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
+
+  if (profile_block_flag == 2)
+    {
+      fputs ("\tsethi\t%hi(", file);
+      assemble_name (file, LPBX);
+      fputs ("),%o0\n", file);
+  
+      fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
+
+      fputs ("\tor\t%o0,%lo(", file);
+      assemble_name (file, LPBX);
+      fputs ("),%o0\n", file);
+  
+      fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
+
+      fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
+    }
+  else if (profile_block_flag != 0)
+    {
+      char LPBY[32];
+      ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
+
+      fputs ("\tsethi\t%hi(", file);
+      assemble_name (file, LPBX);
+      fputs ("),%o0\n", file);
+      
+      fputs ("\tld\t[%lo(", file);
+      assemble_name (file, LPBX);
+      fputs (")+%o0],%o1\n", file);
+
+      fputs ("\ttst\t%o1\n", file);
+
+      if (TARGET_V9)
+	{
+	  fputs ("\tbne,pn\t%icc,", file);
+	  assemble_name (file, LPBY);
+	  putc ('\n', file);
+	}
+      else
+	{
+	  fputs ("\tbne\t", file);
+	  assemble_name (file, LPBY);
+	  putc ('\n', file);
+	}
+
+      fputs ("\t or\t%o0,%lo(", file);
+      assemble_name (file, LPBX);
+      fputs ("),%o0\n", file);
+
+      fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
+
+      ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
+    }
+}
+
+/* The following macro shall output assembler code to FILE
+   to increment a counter associated with basic block number BLOCKNO.
+
+   If profile_block_flag == 2
+
+	Output code to initialize the global structure `__bb' and
+	call the function `__bb_trace_func' which will increment the
+	counter.
+
+	`__bb' consists of two words. In the first word the number
+	of the basic block has to be stored. In the second word
+	the address of a block allocated in the object module 
+	has to be stored.
+
+	The basic block number is given by BLOCKNO.
+
+	The address of the block is given by the label created with 
+
+	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
+
+	by FUNCTION_BLOCK_PROFILER.
+
+	Of course, since you are writing the definition of
+	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
+	can take a short cut in the definition of this macro and use the
+	name that you know will result.
+
+	If described in a virtual assembler language the code to be
+	output looks like:
+
+		move BLOCKNO -> (__bb)
+		move LPBX0 -> (__bb+4)
+		call __bb_trace_func
+
+	Note that function `__bb_trace_func' must not change the
+	machine state, especially the flag register. To grant
+	this, you must output code to save and restore registers
+	either in this macro or in the macros MACHINE_STATE_SAVE
+	and MACHINE_STATE_RESTORE. The last two macros will be
+	used in the function `__bb_trace_func', so you must make
+	sure that the function prologue does not change any 
+	register prior to saving it with MACHINE_STATE_SAVE.
+
+   else if profile_block_flag != 0
+
+	Output code to increment the counter directly.
+	Basic blocks are numbered separately from zero within each
+	compiled object module. The count associated with block number
+	BLOCKNO is at index BLOCKNO in an array of words; the name of 
+	this array is a local symbol made with this statement:
+
+	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
+
+	Of course, since you are writing the definition of
+	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
+	can take a short cut in the definition of this macro and use the
+	name that you know will result. 
+
+	If described in a virtual assembler language, the code to be
+	output looks like:
+
+		inc (LPBX2+4*BLOCKNO)
+
+*/
+
+void
+sparc_block_profiler(file, blockno)
+     FILE *file;
+     int blockno;
+{
+  char LPBX[32];
+
+  if (profile_block_flag == 2)
+    {
+      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
+
+      fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
+      fprintf (file, "\tsethi\t%%hi(%d),%%g2\n", blockno);
+      fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
+      fprintf (file, "\tor\t%%g2,%%lo(%d),%%g2\n", blockno);
+
+      fputs ("\tst\t%g2,[%g1]\n", file);
+
+      fputs ("\tsethi\t%hi(", file);
+      assemble_name (file, LPBX);
+      fputs ("),%g2\n", file);
+  
+      fputs ("\tor\t%o2,%lo(", file);
+      assemble_name (file, LPBX);
+      fputs ("),%g2\n", file);
+  
+      fputs ("\tst\t%g2,[%g1+4]\n", file);
+      fputs ("\tmov\t%o7,%g2\n", file);
+
+      fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
+
+      fputs ("\tmov\t%g2,%o7\n", file);
+    }
+  else if (profile_block_flag != 0)
+    {
+      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
+
+      fputs ("\tsethi\t%hi(", file);
+      assemble_name (file, LPBX);
+      fprintf (file, "+%d),%%g1\n", blockno*4);
+
+      fputs ("\tld\t[%g1+%lo(", file);
+      assemble_name (file, LPBX);
+      fprintf (file, "+%d)],%%g2\n", blockno*4);
+
+      fputs ("\tadd\t%g2,1,%g2\n", file);
+
+      fputs ("\tst\t%g2,[%g1+%lo(", file);
+      assemble_name (file, LPBX);
+      fprintf (file, "+%d)]\n", blockno*4);
+    }
+}
+
+/* The following macro shall output assembler code to FILE
+   to indicate a return from function during basic-block profiling.
+
+   If profile_block_flag == 2:
+
+	Output assembler code to call function `__bb_trace_ret'.
+
+	Note that function `__bb_trace_ret' must not change the
+	machine state, especially the flag register. To grant
+	this, you must output code to save and restore registers
+	either in this macro or in the macros MACHINE_STATE_SAVE_RET
+	and MACHINE_STATE_RESTORE_RET. The last two macros will be
+	used in the function `__bb_trace_ret', so you must make
+	sure that the function prologue does not change any 
+	register prior to saving it with MACHINE_STATE_SAVE_RET.
+
+   else if profile_block_flag != 0:
+
+	The macro will not be used, so it need not distinguish
+	these cases.
+*/
+
+void
+sparc_function_block_profiler_exit(file)
+     FILE *file;
+{
+  if (profile_block_flag == 2)
+    fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
+  else
+    abort ();
+}
diff --git a/contrib/gcc/config/sparc/sparc.h b/contrib/gcc/config/sparc/sparc.h
index e66f5e6..ad11d74 100644
--- a/contrib/gcc/config/sparc/sparc.h
+++ b/contrib/gcc/config/sparc/sparc.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for Sun SPARC.
-   Copyright (C) 1987, 88, 89, 92, 94-97, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1987, 88, 89, 92, 94-98, 1999 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com).
    64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
    at Cygnus Support.
@@ -34,10 +34,18 @@ Boston, MA 02111-1307, USA.  */
    architectures to compile for.  We allow targets to choose compile time or
    runtime selection.  */
 #ifdef SPARC_BI_ARCH
+#ifdef IN_LIBGCC2
+#if defined(__sparcv9) || defined(__sparcv_v9) || defined(__arch64__)
+#define TARGET_ARCH32 0
+#else
+#define TARGET_ARCH32 1
+#endif /* V9 sparc */
+#else
 #define TARGET_ARCH32 (! TARGET_64BIT)
+#endif /* IN_LIBGCC2 */
 #else
 #define TARGET_ARCH32 (DEFAULT_ARCH32_P)
-#endif
+#endif /* SPARC_BI_ARCH */
 #define TARGET_ARCH64 (! TARGET_ARCH32)
 
 /* Code model selection.
@@ -55,7 +63,8 @@ Boston, MA 02111-1307, USA.  */
    TARGET_CM_MEDMID: 64 bit address space.
                      The executable must be in the low 16 TB of memory.
                      This corresponds to the low 44 bits, and the %[hml]44
-                     relocs are used.
+                     relocs are used.  The text segment has a maximum size
+                     of 31 bits.
 
    TARGET_CM_MEDANY: 64 bit address space.
                      The text and data segments have a maximum size of 31
@@ -78,7 +87,7 @@ enum cmodel {
 };
 
 /* Value of -mcmodel specified by user.  */
-extern char *sparc_cmodel_string;
+extern const char *sparc_cmodel_string;
 /* One of CM_FOO.  */
 extern enum cmodel sparc_cmodel;
 
@@ -97,44 +106,109 @@ extern enum cmodel sparc_cmodel;
 /* Values of TARGET_CPU_DEFAULT, set via -D in the Makefile,
    and specified by the user via --with-cpu=foo.
    This specifies the cpu implementation, not the architecture size.  */
+/* Note that TARGET_CPU_v9 is assumed to start the list of 64-bit 
+   capable cpu's.  */
 #define TARGET_CPU_sparc	0
 #define TARGET_CPU_v7		0	/* alias for previous */
 #define TARGET_CPU_sparclet	1
 #define TARGET_CPU_sparclite	2
 #define TARGET_CPU_v8		3	/* generic v8 implementation */
 #define TARGET_CPU_supersparc	4
-#define TARGET_CPU_v9		5	/* generic v9 implementation */
-#define TARGET_CPU_sparc64	5	/* alias */
-#define TARGET_CPU_ultrasparc	6
+#define TARGET_CPU_hypersparc   5
+#define TARGET_CPU_sparc86x	6
+#define TARGET_CPU_sparclite86x	6
+#define TARGET_CPU_v9		7	/* generic v9 implementation */
+#define TARGET_CPU_sparcv9	7	/* alias */
+#define TARGET_CPU_sparc64	7	/* alias */
+#define TARGET_CPU_ultrasparc	8
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+
+#define CPP_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC ""
 
-#if TARGET_CPU_DEFAULT == TARGET_CPU_sparc || TARGET_CPU_DEFAULT == TARGET_CPU_v8 || TARGET_CPU_DEFAULT == TARGET_CPU_supersparc
-#define CPP_CPU_DEFAULT_SPEC ""
-#define ASM_CPU_DEFAULT_SPEC ""
-#endif
-#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclet
-#define CPP_CPU_DEFAULT_SPEC "-D__sparclet__"
-#define ASM_CPU_DEFAULT_SPEC "-Asparclet"
-#endif
-#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite
-#define CPP_CPU_DEFAULT_SPEC "-D__sparclite__"
-#define ASM_CPU_DEFAULT_SPEC "-Asparclite"
-#endif
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v9
 /* ??? What does Sun's CC pass?  */
-#define CPP_CPU_DEFAULT_SPEC "-D__sparc_v9__"
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
 /* ??? It's not clear how other assemblers will handle this, so by default
    use GAS.  Sun's Solaris assembler recognizes -xarch=v8plus, but this case
    is handled in sol2.h.  */
-#define ASM_CPU_DEFAULT_SPEC "-Av9"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9"
 #endif
 #if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
-#define CPP_CPU_DEFAULT_SPEC "-D__sparc_v9__"
-#define ASM_CPU_DEFAULT_SPEC "-Av9a"
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
+#endif
+
+#else
+
+#define CPP_CPU64_DEFAULT_SPEC ""
+#define ASM_CPU64_DEFAULT_SPEC ""
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_v8
+#define CPP_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclet
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclet__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclet"
 #endif
-#ifndef CPP_CPU_DEFAULT_SPEC
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclite"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_supersparc
+#define CPP_CPU32_DEFAULT_SPEC "-D__supersparc__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_hypersparc
+#define CPP_CPU32_DEFAULT_SPEC "-D__hypersparc__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite86x
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite86x__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC "-Av8"
+#endif
+
+#endif
+
+#if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC)
 Unrecognized value in TARGET_CPU_DEFAULT.
 #endif
 
+#ifdef SPARC_BI_ARCH
+
+#define CPP_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" CPP_CPU64_DEFAULT_SPEC "} \
+%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" CPP_CPU32_DEFAULT_SPEC "} \
+%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \
+")
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
+#else /* !SPARC_BI_ARCH */
+
+#define CPP_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? CPP_CPU32_DEFAULT_SPEC : CPP_CPU64_DEFAULT_SPEC)
+#define ASM_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? ASM_CPU32_DEFAULT_SPEC : ASM_CPU64_DEFAULT_SPEC)
+
+#endif /* !SPARC_BI_ARCH */
+
 /* Names to predefine in the preprocessor for this target machine.
    ??? It would be nice to not include any subtarget specific values here,
    however there's no way to portably provide subtarget values to
@@ -158,6 +232,8 @@ Unrecognized value in TARGET_CPU_DEFAULT.
 %{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
 %{mcpu=v8:-D__sparc_v8__} \
 %{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
+%{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \
+%{mcpu=sparclite86x:-D__sparclite86x__ -D__sparc_v8__} \
 %{mcpu=v9:-D__sparc_v9__} \
 %{mcpu=ultrasparc:-D__sparc_v9__} \
 %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
@@ -169,8 +245,20 @@ Unrecognized value in TARGET_CPU_DEFAULT.
    sparc64 in 32 bit environments, so for now we only use `sparc64' in
    64 bit environments.  */
 
+#ifdef SPARC_BI_ARCH
+
+#define CPP_ARCH32_SPEC "-D__SIZE_TYPE__=unsigned\\ int -D__PTRDIFF_TYPE__=int \
+-D__GCC_NEW_VARARGS__ -Acpu(sparc) -Amachine(sparc)"
+#define CPP_ARCH64_SPEC "-D__SIZE_TYPE__=long\\ unsigned\\ int -D__PTRDIFF_TYPE__=long\\ int \
+-D__arch64__ -Acpu(sparc64) -Amachine(sparc64)"
+
+#else
+
 #define CPP_ARCH32_SPEC "-D__GCC_NEW_VARARGS__ -Acpu(sparc) -Amachine(sparc)"
 #define CPP_ARCH64_SPEC "-D__arch64__ -Acpu(sparc64) -Amachine(sparc64)"
+
+#endif
+
 #define CPP_ARCH_DEFAULT_SPEC \
 (DEFAULT_ARCH32_P ? CPP_ARCH32_SPEC : CPP_ARCH64_SPEC)
 
@@ -181,7 +269,9 @@ Unrecognized value in TARGET_CPU_DEFAULT.
 "
 
 /* Macros to distinguish endianness.  */
-#define CPP_ENDIAN_SPEC "%{mlittle-endian:-D__LITTLE_ENDIAN__}"
+#define CPP_ENDIAN_SPEC "\
+%{mlittle-endian:-D__LITTLE_ENDIAN__} \
+%{mlittle-endian-data:-D__LITTLE_ENDIAN_DATA__}"
 
 /* Macros to distinguish the particular subtarget.  */
 #define CPP_SUBTARGET_SPEC ""
@@ -254,20 +344,20 @@ Unrecognized value in TARGET_CPU_DEFAULT.
    Do not define this macro if it does not need to do anything.  */
 
 #define EXTRA_SPECS \
-  { "cpp_cpu",		CPP_CPU_SPEC },			\
-  { "cpp_cpu_default",	CPP_CPU_DEFAULT_SPEC },		\
-  { "cpp_arch32",	CPP_ARCH32_SPEC },		\
-  { "cpp_arch64",	CPP_ARCH64_SPEC },		\
-  { "cpp_arch_default",	CPP_ARCH_DEFAULT_SPEC },	\
-  { "cpp_arch",		CPP_ARCH_SPEC },		\
-  { "cpp_endian",	CPP_ENDIAN_SPEC },		\
-  { "cpp_subtarget",	CPP_SUBTARGET_SPEC },		\
-  { "asm_cpu",		ASM_CPU_SPEC },			\
-  { "asm_cpu_default",	ASM_CPU_DEFAULT_SPEC },		\
-  { "asm_arch32",	ASM_ARCH32_SPEC },		\
-  { "asm_arch64",	ASM_ARCH64_SPEC },		\
-  { "asm_arch_default",	ASM_ARCH_DEFAULT_SPEC },	\
-  { "asm_arch",		ASM_ARCH_SPEC },		\
+  { "cpp_cpu",		CPP_CPU_SPEC },		\
+  { "cpp_cpu_default",	CPP_CPU_DEFAULT_SPEC },	\
+  { "cpp_arch32",	CPP_ARCH32_SPEC },	\
+  { "cpp_arch64",	CPP_ARCH64_SPEC },	\
+  { "cpp_arch_default",	CPP_ARCH_DEFAULT_SPEC },\
+  { "cpp_arch",		CPP_ARCH_SPEC },	\
+  { "cpp_endian",	CPP_ENDIAN_SPEC },	\
+  { "cpp_subtarget",	CPP_SUBTARGET_SPEC },	\
+  { "asm_cpu",		ASM_CPU_SPEC },		\
+  { "asm_cpu_default",	ASM_CPU_DEFAULT_SPEC },	\
+  { "asm_arch32",	ASM_ARCH32_SPEC },	\
+  { "asm_arch64",	ASM_ARCH64_SPEC },	\
+  { "asm_arch_default",	ASM_ARCH_DEFAULT_SPEC },\
+  { "asm_arch",		ASM_ARCH_SPEC },	\
   SUBTARGET_EXTRA_SPECS
 
 #define SUBTARGET_EXTRA_SPECS
@@ -299,7 +389,7 @@ void sparc_override_options ();
       {									\
 	if (flag_pic)							\
 	  {								\
-	    char *pic_string = (flag_pic == 1) ? "-fpic" : "-fPIC";	\
+	    const char *pic_string = (flag_pic == 1) ? "-fpic" : "-fPIC";\
 	    warning ("%s and profiling conflict: disabling %s",		\
 		     pic_string, pic_string);				\
 	    flag_pic = 0;						\
@@ -411,8 +501,7 @@ extern int target_flags;
 /* 0x2000, 0x4000 are unused */
 
 /* Nonzero if pointers are 64 bits.
-   This is not a user selectable option, though it may be one day -
-   so it is used to determine pointer size instead of an architecture flag.  */
+   At the moment it must follow architecture size flag.  */
 #define MASK_PTR64 0x8000
 #define TARGET_PTR64 (target_flags & MASK_PTR64)
 
@@ -480,44 +569,44 @@ extern int target_flags;
    An empty string NAME is used to identify the default VALUE.  */
 
 #define TARGET_SWITCHES  \
-  { {"fpu", MASK_FPU | MASK_FPU_SET},	\
-    {"no-fpu", -MASK_FPU},		\
-    {"no-fpu", MASK_FPU_SET},		\
-    {"hard-float", MASK_FPU | MASK_FPU_SET}, \
-    {"soft-float", -MASK_FPU},		\
-    {"soft-float", MASK_FPU_SET},	\
-    {"epilogue", MASK_EPILOGUE},	\
-    {"no-epilogue", -MASK_EPILOGUE},	\
-    {"unaligned-doubles", MASK_UNALIGNED_DOUBLES}, \
-    {"no-unaligned-doubles", -MASK_UNALIGNED_DOUBLES}, \
-    {"impure-text", MASK_IMPURE_TEXT},	\
-    {"no-impure-text", -MASK_IMPURE_TEXT}, \
-    {"flat", MASK_FLAT},		\
-    {"no-flat", -MASK_FLAT},		\
-    {"app-regs", MASK_APP_REGS},	\
-    {"no-app-regs", -MASK_APP_REGS},	\
-    {"hard-quad-float", MASK_HARD_QUAD}, \
-    {"soft-quad-float", -MASK_HARD_QUAD}, \
-    {"v8plus", MASK_V8PLUS},		\
-    {"no-v8plus", -MASK_V8PLUS},	\
-    {"vis", MASK_VIS},			\
+  { {"fpu", MASK_FPU | MASK_FPU_SET,			"Use hardware fp" },		\
+    {"no-fpu", -MASK_FPU,				"Do not use hardware fp" },	\
+    {"no-fpu", MASK_FPU_SET,				"Do not use hardware fp" },	\
+    {"hard-float", MASK_FPU | MASK_FPU_SET,		"Use hardware fp" },		\
+    {"soft-float", -MASK_FPU,				"Do not use hardware fp" },	\
+    {"soft-float", MASK_FPU_SET,			"Do not use hardware fp" },	\
+    {"epilogue", MASK_EPILOGUE,				"Use FUNCTION_EPILOGUE" },	\
+    {"no-epilogue", -MASK_EPILOGUE,			"Do not use FUNCTION_EPILOGUE" }, 	\
+    {"unaligned-doubles", MASK_UNALIGNED_DOUBLES,	"Assume possible double misalignment" },\
+    {"no-unaligned-doubles", -MASK_UNALIGNED_DOUBLES,	"Assume all doubles are aligned" }, \
+    {"impure-text", MASK_IMPURE_TEXT,			"Pass -assert pure-text to linker" }, \
+    {"no-impure-text", -MASK_IMPURE_TEXT,		"Do not pass -assert pure-text to linker" }, \
+    {"flat", MASK_FLAT,					"Use flat register window model" }, \
+    {"no-flat", -MASK_FLAT,				"Do not use flat register window model" }, \
+    {"app-regs", MASK_APP_REGS,				"Use ABI reserved registers" },	\
+    {"no-app-regs", -MASK_APP_REGS,			"Do not use ABI reserved registers" }, \
+    {"hard-quad-float", MASK_HARD_QUAD,			"Use hardware quad fp instructions" }, \
+    {"soft-quad-float", -MASK_HARD_QUAD,		"Do not use hardware quad fp instructions" }, \
+    {"v8plus", MASK_V8PLUS,				"Compile for v8plus ABI" },	\
+    {"no-v8plus", -MASK_V8PLUS,				"Do not compile for v8plus ABI" }, \
+    {"vis", MASK_VIS,					"Utilize Visual Instruction Set" }, \
+    {"no-vis", -MASK_VIS,				"Do not utilize Visual Instruction Set" }, \
     /* ??? These are deprecated, coerced to -mcpu=.  Delete in 2.9.  */ \
-    {"cypress", 0},			\
-    {"sparclite", 0},			\
-    {"f930", 0},			\
-    {"f934", 0},			\
-    {"v8", 0},				\
-    {"supersparc", 0},			\
+    {"cypress", 0,					"Optimize for Cypress processors" }, \
+    {"sparclite", 0,					"Optimize for SparcLite processors" }, \
+    {"f930", 0,						"Optimize for F930 processors" }, \
+    {"f934", 0,						"Optimize for F934 processors" }, \
+    {"v8", 0,						"Use V8 Sparc ISA" }, \
+    {"supersparc", 0,					"Optimize for SuperSparc processors" }, \
     /* End of deprecated options.  */	\
-    /* -mptrNN exists for *experimental* purposes.  */ \
-/*  {"ptr64", MASK_PTR64}, */		\
-/*  {"ptr32", -MASK_PTR64}, */		\
-    {"32", -MASK_64BIT},		\
-    {"64", MASK_64BIT},			\
-    {"stack-bias", MASK_STACK_BIAS},	\
-    {"no-stack-bias", -MASK_STACK_BIAS}, \
+    {"ptr64", MASK_PTR64,				"Pointers are 64-bit" }, \
+    {"ptr32", -MASK_PTR64,				"Pointers are 32-bit" }, \
+    {"32", -MASK_64BIT,					"Use 32-bit ABI" }, \
+    {"64", MASK_64BIT,					"Use 64-bit ABI" }, \
+    {"stack-bias", MASK_STACK_BIAS,			"Use stack bias" }, \
+    {"no-stack-bias", -MASK_STACK_BIAS,			"Do not use stack bias" }, \
     SUBTARGET_SWITCHES			\
-    { "", TARGET_DEFAULT}}
+    { "", TARGET_DEFAULT, ""}}
 
 /* MASK_APP_REGS must always be the default because that's what
    FIXED_REGISTERS is set to and -ffixed- is processed before
@@ -537,6 +626,8 @@ enum processor_type {
   PROCESSOR_SPARCLITE,
   PROCESSOR_F930,
   PROCESSOR_F934,
+  PROCESSOR_HYPERSPARC,
+  PROCESSOR_SPARCLITE86X,
   PROCESSOR_SPARCLET,
   PROCESSOR_TSC701,
   PROCESSOR_V9,
@@ -569,12 +660,12 @@ extern enum processor_type sparc_cpu;
 
 #define TARGET_OPTIONS \
 {							\
-  { "cpu=",  &sparc_select[1].string },			\
-  { "tune=", &sparc_select[2].string },			\
-  { "cmodel=", &sparc_cmodel_string },			\
-  { "align-loops=",	&sparc_align_loops_string },	\
-  { "align-jumps=",	&sparc_align_jumps_string },	\
-  { "align-functions=",	&sparc_align_funcs_string },	\
+  { "cpu=",  &sparc_select[1].string, "Use features of and schedule code for given CPU" }, \
+  { "tune=", &sparc_select[2].string, "Schedule code for given CPU" }, \
+  { "cmodel=", &sparc_cmodel_string, "Use given Sparc code model" }, \
+  { "align-loops=",	&sparc_align_loops_string, "Loop code aligned to this power of 2" }, \
+  { "align-jumps=",	&sparc_align_jumps_string, "Jump targets are aligned to this power of 2" }, \
+  { "align-functions=",	&sparc_align_funcs_string, "Function starts are aligned to this power of 2" }, \
   SUBTARGET_OPTIONS 					\
 }
 
@@ -584,8 +675,8 @@ extern enum processor_type sparc_cpu;
 /* sparc_select[0] is reserved for the default cpu.  */
 struct sparc_cpu_select
 {
-  char *string;
-  char *name;
+  const char *string;
+  const char *name;
   int set_tune_p;
   int set_arch_p;
 };
@@ -593,9 +684,9 @@ struct sparc_cpu_select
 extern struct sparc_cpu_select sparc_select[];
 
 /* Variables to record values the user passes.  */
-extern char *sparc_align_loops_string;
-extern char *sparc_align_jumps_string;
-extern char *sparc_align_funcs_string;
+extern const char *sparc_align_loops_string;
+extern const char *sparc_align_jumps_string;
+extern const char *sparc_align_funcs_string;
 /* Parsed values as a power of two.  */
 extern int sparc_align_loops;
 extern int sparc_align_jumps;
@@ -623,7 +714,7 @@ extern int sparc_align_funcs;
 
 /* Define this to set the endianness to use in libgcc2.c, which can
    not depend on target_flags.  */
-#if defined (__LITTLE_ENDIAN__)
+#if defined (__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN_DATA__)
 #define LIBGCC2_WORDS_BIG_ENDIAN 0
 #else
 #define LIBGCC2_WORDS_BIG_ENDIAN 1
@@ -912,14 +1003,17 @@ if (TARGET_ARCH64				\
 #define CONDITIONAL_REGISTER_USAGE				\
 do								\
   {								\
-    if (TARGET_ARCH32)						\
+    if (flag_pic)						\
       {								\
-	fixed_regs[5] = 1;					\
+	fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;		\
+	call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;		\
       }								\
-    else							\
+    if (TARGET_ARCH32)						\
       {								\
-	fixed_regs[1] = 1;					\
+	fixed_regs[5] = 1;					\
       }								\
+    if (TARGET_LIVE_G0)						\
+      fixed_regs[0] = 0;					\
     if (! TARGET_V9)						\
       {								\
 	int regno;						\
@@ -959,11 +1053,6 @@ do								\
 	fixed_regs[1] = 1;					\
 	fixed_regs[2] = 1;					\
       }								\
-    if (flag_pic != 0)						\
-      {								\
-        fixed_regs[23] = 1;					\
-        call_used_regs[23] = 1;					\
-      }								\
   }								\
 while (0)
 
@@ -987,9 +1076,18 @@ while (0)
 
 /* A subreg in 64 bit mode will have the wrong offset for a floating point
    register.  The least significant part is at offset 1, compared to 0 for
-   integer registers.  */
+   integer registers.  This only applies when FMODE is a larger mode.
+   We also need to handle a special case of TF-->DF conversions.  */
 #define ALTER_HARD_SUBREG(TMODE, WORD, FMODE, REGNO)			\
-     (TARGET_ARCH64 && (REGNO) >= 32 && (REGNO) < 96 && (TMODE) == SImode ? 1 : ((REGNO) + (WORD)))
+     (TARGET_ARCH64							\
+      && (REGNO) >= SPARC_FIRST_FP_REG					\
+      && (REGNO) <= SPARC_LAST_V9_FP_REG				\
+      && (TMODE) == SImode						\
+      && !((FMODE) == QImode || (FMODE) == HImode)			\
+      ? ((REGNO) + 1)							\
+      : ((TMODE) == DFmode && (FMODE) == TFmode)			\
+        ? ((REGNO) + ((WORD) * 2))					\
+        : ((REGNO) + (WORD)))
 
 /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
    See sparc.c for how we initialize this.  */
@@ -1080,7 +1178,6 @@ extern int sparc_mode_class[];
 
 #define PIC_OFFSET_TABLE_REGNUM 23
 
-#define INITIALIZE_PIC initialize_pic ()
 #define FINALIZE_PIC finalize_pic ()
 
 /* Pick a default value we can notice from override_options:
@@ -1348,37 +1445,60 @@ extern char leaf_reg_remap[];
    in class CLASS, return the class of reg to actually use.
    In general this is just CLASS; but on some machines
    in some cases it is preferable to use a more restrictive class.  */
-/* We can't load constants into FP registers.  We can't load any FP constant
-   if an 'E' constraint fails to match it.  */
+/* - We can't load constants into FP registers.  We can't load any FP
+     constant if an 'E' constraint fails to match it.
+   - Try and reload integer constants (symbolic or otherwise) back into
+     registers directly, rather than having them dumped to memory.  */
+
 #define PREFERRED_RELOAD_CLASS(X,CLASS)			\
   (CONSTANT_P (X)					\
-   && (FP_REG_CLASS_P (CLASS)				\
+   ? ((FP_REG_CLASS_P (CLASS)				\
        || (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT	\
 	   && (HOST_FLOAT_FORMAT != IEEE_FLOAT_FORMAT	\
 	       || HOST_BITS_PER_INT != BITS_PER_WORD)))	\
-   ? NO_REGS : (CLASS))
+      ? NO_REGS						\
+      : (!FP_REG_CLASS_P (CLASS)			\
+         && GET_MODE_CLASS (GET_MODE (X)) == MODE_INT)	\
+      ? GENERAL_REGS					\
+      : (CLASS))					\
+   : (CLASS))
 
 /* Return the register class of a scratch register needed to load IN into
    a register of class CLASS in MODE.
 
-   On the SPARC, when PIC, we need a temporary when loading some addresses
-   into a register.
-
-   Also, we need a temporary when loading/storing a HImode/QImode value
+   We need a temporary when loading/storing a HImode/QImode value
    between memory and the FPU registers.  This can happen when combine puts
    a paradoxical subreg in a float/fix conversion insn.  */
 
 #define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, IN)		\
-  ((FP_REG_CLASS_P (CLASS) && ((MODE) == HImode || (MODE) == QImode) \
+  ((FP_REG_CLASS_P (CLASS)					\
+    && ((MODE) == HImode || (MODE) == QImode)			\
     && (GET_CODE (IN) == MEM					\
-	|| ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)	\
-	    && true_regnum (IN) == -1))) ? GENERAL_REGS : NO_REGS)
+        || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)	\
+            && true_regnum (IN) == -1)))			\
+   ? GENERAL_REGS						\
+   : (((TARGET_CM_MEDANY					\
+        && symbolic_operand ((IN), (MODE)))			\
+       || (TARGET_CM_EMBMEDANY					\
+           && text_segment_operand ((IN), (MODE))))		\
+      && !flag_pic)						\
+     ? GENERAL_REGS						\
+     : NO_REGS)
 
 #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, IN)		\
-  ((FP_REG_CLASS_P (CLASS) && ((MODE) == HImode || (MODE) == QImode) \
-    && (GET_CODE (IN) == MEM					\
-	|| ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)	\
-	    && true_regnum (IN) == -1))) ? GENERAL_REGS : NO_REGS)
+   ((FP_REG_CLASS_P (CLASS)					\
+     && ((MODE) == HImode || (MODE) == QImode)			\
+     && (GET_CODE (IN) == MEM					\
+         || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)	\
+             && true_regnum (IN) == -1)))			\
+    ? GENERAL_REGS						\
+   : (((TARGET_CM_MEDANY					\
+        && symbolic_operand ((IN), (MODE)))			\
+       || (TARGET_CM_EMBMEDANY					\
+           && text_segment_operand ((IN), (MODE))))		\
+      && !flag_pic)						\
+     ? GENERAL_REGS						\
+     : NO_REGS)
 
 /* On SPARC it is not possible to directly move data between 
    GENERAL_REGS and FP_REGS.  */
@@ -1459,6 +1579,10 @@ extern char leaf_reg_remap[];
   (TARGET_ARCH64 ? (SPARC_STACK_BIAS + 16 * UNITS_PER_WORD) \
    : (STRUCT_VALUE_OFFSET + UNITS_PER_WORD))
 
+/* Offset from the argument pointer register value to the CFA.  */
+
+#define ARG_POINTER_CFA_OFFSET  SPARC_STACK_BIAS
+
 /* When a parameter is passed in a register, stack space is still
    allocated for it.
    !v9: All 6 possible integer registers have backing store allocated.
@@ -1714,246 +1838,38 @@ do {									\
    to do a "save" insn.  The decision about whether or not
    to do this is made in regclass.c.  */
 
-extern int leaf_function;
 #define FUNCTION_PROLOGUE(FILE, SIZE) \
   (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, (int)SIZE) \
-   : output_function_prologue (FILE, (int)SIZE, leaf_function))
+   : output_function_prologue (FILE, (int)SIZE, \
+			       current_function_uses_only_leaf_regs))
 
 /* Output assembler code to FILE to increment profiler label # LABELNO
-   for profiling a function entry.
-
-   32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
-   during profiling so we need to save/restore it around the call to mcount.
-   We're guaranteed that a save has just been done, and we use the space
-   allocated for intreg/fpreg value passing.  */
-
-#define FUNCTION_PROFILER(FILE, LABELNO)  			\
-  do {								\
-    char buf[20];						\
-    ASM_GENERATE_INTERNAL_LABEL (buf, "LP", (LABELNO));		\
-    if (! TARGET_ARCH64)					\
-      fputs ("\tst %g2,[%fp-4]\n", FILE);			\
-    fputs ("\tsethi %hi(", FILE);				\
-    assemble_name (FILE, buf);					\
-    fputs ("),%o0\n", FILE);					\
-    fputs ("\tcall mcount\n\tadd %o0,%lo(", FILE);		\
-    assemble_name (FILE, buf);					\
-    fputs ("),%o0\n", FILE);					\
-    if (! TARGET_ARCH64)					\
-      fputs ("\tld [%fp-4],%g2\n", FILE);			\
-  } while (0)
-
-/* There are three profiling modes for basic blocks available.
-   The modes are selected at compile time by using the options
-   -a or -ax of the gnu compiler.
-   The variable `profile_block_flag' will be set according to the
-   selected option.
+   for profiling a function entry.  */
 
-   profile_block_flag == 0, no option used:
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  sparc_function_profiler(FILE, LABELNO)
 
-      No profiling done.
+/* Set the name of the mcount function for the system.  */
 
-   profile_block_flag == 1, -a option used.
-
-      Count frequency of execution of every basic block.
-
-   profile_block_flag == 2, -ax option used.
-
-      Generate code to allow several different profiling modes at run time. 
-      Available modes are:
-             Produce a trace of all basic blocks.
-             Count frequency of jump instructions executed.
-      In every mode it is possible to start profiling upon entering
-      certain functions and to disable profiling of some other functions.
-
-    The result of basic-block profiling will be written to a file `bb.out'.
-    If the -ax option is used parameters for the profiling will be read
-    from file `bb.in'.
-
-*/
+#define MCOUNT_FUNCTION "*mcount"
 
 /* The following macro shall output assembler code to FILE
-   to initialize basic-block profiling.
-
-   If profile_block_flag == 2
-
-	Output code to call the subroutine `__bb_init_trace_func'
-	and pass two parameters to it. The first parameter is
-	the address of a block allocated in the object module.
-	The second parameter is the number of the first basic block
-	of the function.
+   to initialize basic-block profiling.  */
 
-	The name of the block is a local symbol made with this statement:
-	
-	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
-
-	Of course, since you are writing the definition of
-	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
-	can take a short cut in the definition of this macro and use the
-	name that you know will result.
-
-	The number of the first basic block of the function is
-	passed to the macro in BLOCK_OR_LABEL.
-
-	If described in a virtual assembler language the code to be
-	output looks like:
-
-		parameter1 <- LPBX0
-		parameter2 <- BLOCK_OR_LABEL
-		call __bb_init_trace_func
-
-    else if profile_block_flag != 0
-
-	Output code to call the subroutine `__bb_init_func'
-	and pass one single parameter to it, which is the same
-	as the first parameter to `__bb_init_trace_func'.
-
-	The first word of this parameter is a flag which will be nonzero if
-	the object module has already been initialized.  So test this word
-	first, and do not call `__bb_init_func' if the flag is nonzero.
-	Note: When profile_block_flag == 2 the test need not be done
-	but `__bb_init_trace_func' *must* be called.
-
-	BLOCK_OR_LABEL may be used to generate a label number as a
-	branch destination in case `__bb_init_func' will not be called.
-
-	If described in a virtual assembler language the code to be
-	output looks like:
-
-		cmp (LPBX0),0
-		jne local_label
-		parameter1 <- LPBX0
-		call __bb_init_func
-local_label:
-
-*/
-
-#define FUNCTION_BLOCK_PROFILER(FILE, BLOCK_OR_LABEL)	\
-do							\
-  {							\
-    int bol = (BLOCK_OR_LABEL);				\
-    switch (profile_block_flag)				\
-      {							\
-      case 2:						\
-        fprintf (FILE, "\tsethi %%hi(LPBX0),%%o0\n\tor %%o0,%%lo(LPBX0),%%o0\n\tsethi %%hi(%d),%%o1\n\tcall ___bb_init_trace_func\n\tor %%o1,%%lo(%d),%%o1\n",\
-                 bol, bol);				\
-        break;						\
-      default:						\
-        fprintf (FILE, "\tsethi %%hi(LPBX0),%%o0\n\tld [%%lo(LPBX0)+%%o0],%%o1\n\ttst %%o1\n\tbne LPY%d\n\tadd %%o0,%%lo(LPBX0),%%o0\n\tcall ___bb_init_func\n\tnop\nLPY%d:\n",\
-                 bol, bol);				\
-        break;						\
-      }							\
-  }							\
-while (0)
+#define FUNCTION_BLOCK_PROFILER(FILE, BLOCK_OR_LABEL) \
+  sparc_function_block_profiler(FILE, BLOCK_OR_LABEL)
 
 /* The following macro shall output assembler code to FILE
-   to increment a counter associated with basic block number BLOCKNO.
-
-   If profile_block_flag == 2
-
-	Output code to initialize the global structure `__bb' and
-	call the function `__bb_trace_func' which will increment the
-	counter.
-
-	`__bb' consists of two words. In the first word the number
-	of the basic block has to be stored. In the second word
-	the address of a block allocated in the object module 
-	has to be stored.
+   to increment a counter associated with basic block number BLOCKNO.  */
 
-	The basic block number is given by BLOCKNO.
-
-	The address of the block is given by the label created with 
-
-	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
-
-	by FUNCTION_BLOCK_PROFILER.
-
-	Of course, since you are writing the definition of
-	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
-	can take a short cut in the definition of this macro and use the
-	name that you know will result.
-
-	If described in a virtual assembler language the code to be
-	output looks like:
-
-		move BLOCKNO -> (__bb)
-		move LPBX0 -> (__bb+4)
-		call __bb_trace_func
-
-	Note that function `__bb_trace_func' must not change the
-	machine state, especially the flag register. To grant
-	this, you must output code to save and restore registers
-	either in this macro or in the macros MACHINE_STATE_SAVE
-	and MACHINE_STATE_RESTORE. The last two macros will be
-	used in the function `__bb_trace_func', so you must make
-	sure that the function prologue does not change any 
-	register prior to saving it with MACHINE_STATE_SAVE.
-
-   else if profile_block_flag != 0
-
-	Output code to increment the counter directly.
-	Basic blocks are numbered separately from zero within each
-	compiled object module. The count associated with block number
-	BLOCKNO is at index BLOCKNO in an array of words; the name of 
-	this array is a local symbol made with this statement:
-
-	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
-
-	Of course, since you are writing the definition of
-	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
-	can take a short cut in the definition of this macro and use the
-	name that you know will result. 
-
-	If described in a virtual assembler language, the code to be
-	output looks like:
-
-		inc (LPBX2+4*BLOCKNO)
-
-*/
-
-#define BLOCK_PROFILER(FILE, BLOCKNO)	\
-do					\
-  {					\
-    int blockn = (BLOCKNO);		\
-    switch (profile_block_flag)		\
-      {					\
-      case 2:				\
-        fprintf (FILE, "\tsethi %%hi(___bb),%%g1\n\tsethi %%hi(%d),%%g2\n\tor %%g2,%%lo(%d),%%g2\n\tst %%g2,[%%lo(___bb)+%%g1]\n\tsethi %%hi(LPBX0),%%g2\n\tor %%g2,%%lo(LPBX0),%%g2\n\tadd 4,%%g1,%%g1\n\tst %%g2,[%%lo(___bb)+%%g1]\n\tmov %%o7,%%g2\n\tcall ___bb_trace_func\n\tnop\n\tmov %%g2,%%o7\n",\
-                 blockn, blockn); \
-        break;				\
-      default:				\
-        fprintf (FILE, "\tsethi %%hi(LPBX2+%d),%%g1\n\tld [%%lo(LPBX2+%d)+%%g1],%%g2\n\
-\tadd %%g2,1,%%g2\n\tst %%g2,[%%lo(LPBX2+%d)+%%g1]\n", \
-                 4 * blockn, 4 * blockn, 4 * blockn); \
-        break;				\
-      }					\
-  }					\
-while(0)
+#define BLOCK_PROFILER(FILE, BLOCKNO) \
+  sparc_block_profiler (FILE, BLOCKNO)
 
 /* The following macro shall output assembler code to FILE
-   to indicate a return from function during basic-block profiling.
-
-   If profiling_block_flag == 2:
-
-	Output assembler code to call function `__bb_trace_ret'.
-
-	Note that function `__bb_trace_ret' must not change the
-	machine state, especially the flag register. To grant
-	this, you must output code to save and restore registers
-	either in this macro or in the macros MACHINE_STATE_SAVE_RET
-	and MACHINE_STATE_RESTORE_RET. The last two macros will be
-	used in the function `__bb_trace_ret', so you must make
-	sure that the function prologue does not change any 
-	register prior to saving it with MACHINE_STATE_SAVE_RET.
-
-   else if profiling_block_flag != 0:
-
-	The macro will not be used, so it need not distinguish
-	these cases.
-*/
+   to indicate a return from function during basic-block profiling.  */
 
 #define FUNCTION_BLOCK_PROFILER_EXIT(FILE) \
-  fprintf (FILE, "\tcall ___bb_trace_ret\n\tnop\n" );
+  sparc_function_block_profiler_exit(FILE)
 
 /* The function `__bb_trace_func' is called in every basic block
    and is not allowed to change the machine state. Saving (restoring)
@@ -2092,7 +2008,8 @@ extern union tree_node *current_function_decl;
 
 #define FUNCTION_EPILOGUE(FILE, SIZE) \
   (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, (int)SIZE) \
-   : output_function_epilogue (FILE, (int)SIZE, leaf_function))
+   : output_function_epilogue (FILE, (int)SIZE, \
+			       current_function_uses_only_leaf_regs))
 
 #define DELAY_SLOTS_FOR_EPILOGUE \
   (TARGET_FLAT ? sparc_flat_epilogue_delay_slots () : 1)
@@ -2190,11 +2107,11 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 
 /* Addressing modes, and classification of registers for them.  */
 
-/* #define HAVE_POST_INCREMENT */
-/* #define HAVE_POST_DECREMENT */
+/* #define HAVE_POST_INCREMENT 0 */
+/* #define HAVE_POST_DECREMENT 0 */
 
-/* #define HAVE_PRE_DECREMENT */
-/* #define HAVE_PRE_INCREMENT */
+/* #define HAVE_PRE_DECREMENT 0 */
+/* #define HAVE_PRE_INCREMENT 0 */
 
 /* Macros to check register numbers against specific register classes.  */
 
@@ -2250,10 +2167,13 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 #define LEGITIMATE_PIC_OPERAND_P(X)  (! pic_address_needs_scratch (X))
 
 /* Nonzero if the constant value X is a legitimate general operand.
-   Anything can be made to work except floating point constants.  */
+   Anything can be made to work except floating point constants.
+   If TARGET_VIS, 0.0 can be made to work as well.  */
 
-#define LEGITIMATE_CONSTANT_P(X) \
-  (GET_CODE (X) != CONST_DOUBLE || GET_MODE (X) == VOIDmode)
+#define LEGITIMATE_CONSTANT_P(X) 					\
+  (GET_CODE (X) != CONST_DOUBLE || GET_MODE (X) == VOIDmode || 		\
+   (TARGET_VIS && (GET_MODE (X) == SFmode || GET_MODE (X) == DFmode) &&	\
+    fp_zero_operand (X)))
 
 /* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
    and check its validity for a certain class.
@@ -2268,15 +2188,13 @@ extern struct rtx_def *sparc_builtin_saveregs ();
    After reload, it makes no difference, since pseudo regs have
    been eliminated by then.  */
 
-/* Optional extra constraints for this machine.  Borrowed from romp.h.
+/* Optional extra constraints for this machine.
 
-   For the SPARC, `Q' means that this is a memory operand but not a
-   symbolic memory operand.  Note that an unassigned pseudo register
-   is such a memory operand.  Needed because reload will generate
-   these things in insns and then not re-recognize the insns, causing
-   constrain_operands to fail.
+   'T' handles memory addresses where the alignment is known to
+       be at least 8 bytes.
 
-   `S' handles constraints for calls.  ??? So where is it?  */
+   `U' handles all pseudo registers or a hard even numbered
+       integer register, needed for ldd/std instructions.  */
 
 #ifndef REG_OK_STRICT
 
@@ -2292,17 +2210,11 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 /* 'T', 'U' are for aligned memory loads which aren't needed for v9.  */
 
 #define EXTRA_CONSTRAINT(OP, C)				\
-  ((C) == 'Q'						\
-   ? ((GET_CODE (OP) == MEM				\
-       && memory_address_p (GET_MODE (OP), XEXP (OP, 0)) \
-       && ! symbolic_memory_operand (OP, VOIDmode))	\
-      || (reload_in_progress && GET_CODE (OP) == REG	\
-	  && REGNO (OP) >= FIRST_PSEUDO_REGISTER))	\
-   : (! TARGET_ARCH64 && (C) == 'T')			\
-   ? (mem_aligned_8 (OP))				\
-   : (! TARGET_ARCH64 && (C) == 'U')			\
-   ? (register_ok_for_ldd (OP))				\
-   : 0)
+   ((! TARGET_ARCH64 && (C) == 'T')			\
+    ? (mem_min_alignment (OP, 8))			\
+    : ((! TARGET_ARCH64 && (C) == 'U')			\
+       ? (register_ok_for_ldd (OP))			\
+       : 0))
  
 #else
 
@@ -2312,19 +2224,14 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 #define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
 
 #define EXTRA_CONSTRAINT(OP, C)				\
-  ((C) == 'Q'						\
-   ? (GET_CODE (OP) == REG				\
-      ? (REGNO (OP) >= FIRST_PSEUDO_REGISTER		\
-	 && reg_renumber[REGNO (OP)] < 0)		\
-      : GET_CODE (OP) == MEM)				\
-   : (! TARGET_ARCH64 && (C) == 'T')			\
-   ? mem_aligned_8 (OP) && strict_memory_address_p (Pmode, XEXP (OP, 0)) \
-   : (! TARGET_ARCH64 && (C) == 'U')			\
-   ? (GET_CODE (OP) == REG				\
-      && (REGNO (OP) < FIRST_PSEUDO_REGISTER		\
-	  || reg_renumber[REGNO (OP)] >= 0)		\
-      && register_ok_for_ldd (OP))			\
-   : 0)
+   ((! TARGET_ARCH64 && (C) == 'T')			\
+    ? mem_min_alignment (OP, 8) && strict_memory_address_p (Pmode, XEXP (OP, 0)) \
+    : ((! TARGET_ARCH64 && (C) == 'U')			\
+       ? (GET_CODE (OP) == REG				\
+          && (REGNO (OP) < FIRST_PSEUDO_REGISTER	\
+	      || reg_renumber[REGNO (OP)] >= 0)		\
+          && register_ok_for_ldd (OP))			\
+       : 0))
 #endif
 
 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
@@ -2373,13 +2280,32 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 	}						\
       else if (RTX_OK_FOR_BASE_P (op0))			\
 	{						\
-	  if (RTX_OK_FOR_INDEX_P (op1)			\
+	  if ((RTX_OK_FOR_INDEX_P (op1)			\
+ 	      /* We prohibit REG + REG for TFmode when	\
+		 there are no instructions which accept	\
+		 REG+REG instructions.  We do this	\
+		 because REG+REG is not an offsetable	\
+		 address.  If we get the situation	\
+		 in reload where source and destination	\
+		 of a movtf pattern are both MEMs with	\
+		 REG+REG address, then only one of them	\
+		 gets converted to an offsetable	\
+		 address. */				\
+ 	      && (MODE != TFmode			\
+		  || (TARGET_FPU && TARGET_ARCH64	\
+		      && TARGET_V9			\
+		      && TARGET_HARD_QUAD)))		\
 	      || RTX_OK_FOR_OFFSET_P (op1))		\
 	    goto ADDR;					\
 	}						\
       else if (RTX_OK_FOR_BASE_P (op1))			\
 	{						\
-	  if (RTX_OK_FOR_INDEX_P (op0)			\
+	  if ((RTX_OK_FOR_INDEX_P (op0)			\
+ 	      /* See the previous comment. */		\
+ 	      && (MODE != TFmode			\
+		  || (TARGET_FPU && TARGET_ARCH64	\
+		      && TARGET_V9			\
+		      && TARGET_HARD_QUAD)))		\
 	      || RTX_OK_FOR_OFFSET_P (op0))		\
 	    goto ADDR;					\
 	}						\
@@ -2392,8 +2318,8 @@ extern struct rtx_def *sparc_builtin_saveregs ();
 	  && CONSTANT_P (op1)				\
 	  /* We can't allow TFmode, because an offset	\
 	     greater than or equal to the alignment (8)	\
-	     may cause the LO_SUM to overflow.  */	\
-	  && MODE != TFmode)				\
+	     may cause the LO_SUM to overflow if !v9. */\
+	  && (MODE != TFmode || TARGET_V9))		\
 	goto ADDR;					\
     }							\
   else if (GET_CODE (X) == CONST_INT && SMALL_INT (X))	\
@@ -2440,11 +2366,43 @@ extern struct rtx_def *legitimize_pic_address ();
 		   copy_to_mode_reg (Pmode, XEXP (X, 0)));	\
   else if (GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == CONST	\
 	   || GET_CODE (X) == LABEL_REF)			\
-    (X) = gen_rtx_LO_SUM (Pmode,				\
-			  copy_to_mode_reg (Pmode, gen_rtx_HIGH (Pmode, X)), X); \
+    (X) = copy_to_suggested_reg (X, NULL_RTX, Pmode); 		\
   if (memory_address_p (MODE, X))				\
     goto WIN; }
 
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.
+
+   For Sparc 32, we wish to handle addresses by splitting them into
+   HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. 
+   This cuts the number of extra insns by one.
+
+   Do nothing when generating PIC code and the address is a
+   symbolic operand or requires a scratch register.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)     \
+do {                                                                    \
+  /* Decompose SImode constants into hi+lo_sum.  We do have to 		\
+     rerecognize what we produce, so be careful.  */			\
+  if (CONSTANT_P (X)							\
+      && (MODE != TFmode || TARGET_V9)					\
+      && GET_MODE (X) == SImode						\
+      && GET_CODE (X) != LO_SUM && GET_CODE (X) != HIGH			\
+      && ! (flag_pic							\
+	    && (symbolic_operand (X, Pmode)				\
+		|| pic_address_needs_scratch (X))))			\
+    {									\
+      X = gen_rtx_LO_SUM (GET_MODE (X),					\
+			  gen_rtx_HIGH (GET_MODE (X), X), X);		\
+      push_reload (XEXP (X, 0), NULL_RTX, &XEXP (X, 0), NULL_PTR,	\
+                   BASE_REG_CLASS, GET_MODE (X), VOIDmode, 0, 0,	\
+                   OPNUM, TYPE);					\
+      goto WIN;								\
+    }									\
+  /* ??? 64-bit reloads.  */						\
+} while (0)
+
 /* Go to LABEL if ADDR (a legitimate address expression)
    has an effect that depends on the machine mode it is used for.
    On the SPARC this is never true.  */
@@ -2463,7 +2421,17 @@ extern struct rtx_def *legitimize_pic_address ();
 
 /* Specify the machine mode that this machine uses
    for the index in the tablejump instruction.  */
-#define CASE_VECTOR_MODE Pmode
+/* If we ever implement any of the full models (such as CM_FULLANY),
+   this has to be DImode in that case */
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+#define CASE_VECTOR_MODE \
+(! TARGET_PTR64 ? SImode : flag_pic ? SImode : TARGET_CM_MEDLOW ? SImode : DImode)
+#else
+/* If assembler does not have working .subsection -1, we use DImode for pic, as otherwise
+   we have to sign extend which slows things down. */
+#define CASE_VECTOR_MODE \
+(! TARGET_PTR64 ? SImode : flag_pic ? DImode : TARGET_CM_MEDLOW ? SImode : DImode)
+#endif
 
 /* Define as C expression which evaluates to nonzero if the tablejump
    instruction expects the table to contain offsets from the address of the
@@ -2597,25 +2565,26 @@ extern struct rtx_def *legitimize_pic_address ();
 #define MULSI3_LIBCALL "*.umul"
 
 /* Define library calls for quad FP operations.  These are all part of the
-   SPARC ABI.  */
-#define ADDTF3_LIBCALL "_Q_add"
-#define SUBTF3_LIBCALL "_Q_sub"
-#define NEGTF2_LIBCALL "_Q_neg"
-#define MULTF3_LIBCALL "_Q_mul"
-#define DIVTF3_LIBCALL "_Q_div"
-#define FLOATSITF2_LIBCALL "_Q_itoq"
-#define FIX_TRUNCTFSI2_LIBCALL "_Q_qtoi"
-#define FIXUNS_TRUNCTFSI2_LIBCALL "_Q_qtou"
-#define EXTENDSFTF2_LIBCALL "_Q_stoq"
-#define TRUNCTFSF2_LIBCALL "_Q_qtos"
-#define EXTENDDFTF2_LIBCALL "_Q_dtoq"
-#define TRUNCTFDF2_LIBCALL "_Q_qtod"
-#define EQTF2_LIBCALL "_Q_feq"
-#define NETF2_LIBCALL "_Q_fne"
-#define GTTF2_LIBCALL "_Q_fgt"
-#define GETF2_LIBCALL "_Q_fge"
-#define LTTF2_LIBCALL "_Q_flt"
-#define LETF2_LIBCALL "_Q_fle"
+   SPARC ABI.
+   ??? ARCH64 still does not work as the _Qp_* routines take pointers.  */
+#define ADDTF3_LIBCALL (TARGET_ARCH64 ? "_Qp_add" : "_Q_add")
+#define SUBTF3_LIBCALL (TARGET_ARCH64 ? "_Qp_sub" : "_Q_sub")
+#define NEGTF2_LIBCALL (TARGET_ARCH64 ? "_Qp_neg" : "_Q_neg")
+#define MULTF3_LIBCALL (TARGET_ARCH64 ? "_Qp_mul" : "_Q_mul")
+#define DIVTF3_LIBCALL (TARGET_ARCH64 ? "_Qp_div" : "_Q_div")
+#define FLOATSITF2_LIBCALL (TARGET_ARCH64 ? "_Qp_itoq" : "_Q_itoq")
+#define FIX_TRUNCTFSI2_LIBCALL (TARGET_ARCH64 ? "_Qp_qtoi" : "_Q_qtoi")
+#define FIXUNS_TRUNCTFSI2_LIBCALL (TARGET_ARCH64 ? "_Qp_qtoui" : "_Q_qtou")
+#define EXTENDSFTF2_LIBCALL (TARGET_ARCH64 ? "_Qp_stoq" : "_Q_stoq")
+#define TRUNCTFSF2_LIBCALL (TARGET_ARCH64 ? "_Qp_qtos" :  "_Q_qtos")
+#define EXTENDDFTF2_LIBCALL (TARGET_ARCH64 ? "_Qp_dtoq" : "_Q_dtoq")
+#define TRUNCTFDF2_LIBCALL (TARGET_ARCH64 ? "_Qp_qtod" : "_Q_qtod")
+#define EQTF2_LIBCALL (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq")
+#define NETF2_LIBCALL (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne")
+#define GTTF2_LIBCALL (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt")
+#define GETF2_LIBCALL (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge")
+#define LTTF2_LIBCALL (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt")
+#define LETF2_LIBCALL (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle")
 
 /* We can define the TFmode sqrt optab only if TARGET_FPU.  This is because
    with soft-float, the SFmode and DFmode sqrt instructions will be absent,
@@ -2681,9 +2650,6 @@ extern struct rtx_def *legitimize_pic_address ();
 	return 0;						\
     return 8;
 
-/* Compute the cost of an address.  For the sparc, all valid addresses are
-   the same cost.  */
-
 #define ADDRESS_COST(RTX)  1
 
 /* Compute extra cost of moving data between one register class
@@ -2704,11 +2670,17 @@ extern struct rtx_def *legitimize_pic_address ();
 
 #define RTX_COSTS(X,CODE,OUTER_CODE)			\
   case MULT:						\
+    if (sparc_cpu == PROCESSOR_ULTRASPARC)		\
+      return (GET_MODE (X) == DImode ?			\
+              COSTS_N_INSNS (34) : COSTS_N_INSNS (19));	\
     return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
   case DIV:						\
   case UDIV:						\
   case MOD:						\
   case UMOD:						\
+    if (sparc_cpu == PROCESSOR_ULTRASPARC)		\
+      return (GET_MODE (X) == DImode ?			\
+              COSTS_N_INSNS (68) : COSTS_N_INSNS (37));	\
     return COSTS_N_INSNS (25);				\
   /* Make FLOAT and FIX more expensive than CONST_DOUBLE,\
      so that cse will favor the latter.  */		\
@@ -2719,12 +2691,26 @@ extern struct rtx_def *legitimize_pic_address ();
 #define ISSUE_RATE  sparc_issue_rate()
 
 /* Adjust the cost of dependencies.  */
-#define ADJUST_COST(INSN,LINK,DEP,COST)				\
-  if (sparc_cpu == PROCESSOR_SUPERSPARC)			\
-    (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST);	\
-  else if (sparc_cpu == PROCESSOR_ULTRASPARC)			\
-    (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST);	\
-  else
+#define ADJUST_COST(INSN,LINK,DEP,COST) \
+  sparc_adjust_cost(INSN, LINK, DEP, COST)
+
+extern void ultrasparc_sched_reorder ();
+extern void ultrasparc_sched_init ();
+extern int ultrasparc_variable_issue ();
+
+#define MD_SCHED_INIT(DUMP, SCHED_VERBOSE)				\
+  if (sparc_cpu == PROCESSOR_ULTRASPARC)				\
+    ultrasparc_sched_init (DUMP, SCHED_VERBOSE)
+
+#define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY)		\
+  if (sparc_cpu == PROCESSOR_ULTRASPARC)				\
+    ultrasparc_sched_reorder (DUMP, SCHED_VERBOSE, READY, N_READY)
+
+#define MD_SCHED_VARIABLE_ISSUE(DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE) \
+  if (sparc_cpu == PROCESSOR_ULTRASPARC)			\
+    (CAN_ISSUE_MORE) = ultrasparc_variable_issue (INSN);	\
+  else								\
+    (CAN_ISSUE_MORE)--
 
 /* Conditional branches with empty delay slots have a length of two.  */
 #define ADJUST_INSN_LENGTH(INSN, LENGTH)				\
@@ -2925,13 +2911,23 @@ extern struct rtx_def *legitimize_pic_address ();
 #define ASM_OUTPUT_BYTE(FILE,VALUE)  \
   fprintf (FILE, "\t%s\t0x%x\n", ASM_BYTE_OP, (VALUE))
 
+/* This is how we hook in and defer the case-vector until the end of
+   the function.  */
+extern void sparc_defer_case_vector ();
+
+#define ASM_OUTPUT_ADDR_VEC(LAB,VEC) \
+  sparc_defer_case_vector ((LAB),(VEC), 0)
+
+#define ASM_OUTPUT_ADDR_DIFF_VEC(LAB,VEC) \
+  sparc_defer_case_vector ((LAB),(VEC), 1)
+
 /* This is how to output an element of a case-vector that is absolute.  */
 
 #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
 do {									\
   char label[30];							\
   ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);			\
-  if (Pmode == SImode)							\
+  if (CASE_VECTOR_MODE == SImode)					\
     fprintf (FILE, "\t.word\t");					\
   else									\
     fprintf (FILE, "\t.xword\t");					\
@@ -2945,8 +2941,8 @@ do {									\
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
 do {									\
   char label[30];							\
-  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);			\
-  if (Pmode == SImode)							\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));			\
+  if (CASE_VECTOR_MODE == SImode)					\
     fprintf (FILE, "\t.word\t");					\
   else									\
     fprintf (FILE, "\t.xword\t");					\
@@ -2957,6 +2953,20 @@ do {									\
   fputc ('\n', FILE);							\
 } while (0)
 
+/* This is what to output before and after case-vector (both
+   relative and absolute).  If .subsection -1 works, we put case-vectors
+   at the beginning of the current section.  */
+
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+
+#define ASM_OUTPUT_ADDR_VEC_START(FILE)					\
+  fprintf(FILE, "\t.subsection\t-1\n")
+
+#define ASM_OUTPUT_ADDR_VEC_END(FILE)					\
+  fprintf(FILE, "\t.previous\n")
+
+#endif
+
 /* This is how to output an assembler line
    that says to advance the location counter
    to a multiple of 2**LOG bytes.  */
@@ -3105,7 +3115,6 @@ do {									\
       else if (GET_CODE (index) == REG)				\
 	fprintf (FILE, "+%s", reg_names[REGNO (index)]);	\
       else if (GET_CODE (index) == SYMBOL_REF			\
-	       || GET_CODE (index) == LABEL_REF			\
 	       || GET_CODE (index) == CONST)			\
 	fputc ('+', FILE), output_addr_const (FILE, index);	\
       else abort ();						\
@@ -3121,7 +3130,10 @@ do {									\
   else if (GET_CODE (addr) == LO_SUM)				\
     {								\
       output_operand (XEXP (addr, 0), 0);			\
-      fputs ("+%lo(", FILE);					\
+      if (TARGET_CM_MEDMID)					\
+        fputs ("+%l44(", FILE);					\
+      else							\
+        fputs ("+%lo(", FILE);					\
       output_address (XEXP (addr, 1));				\
       fputc (')', FILE);					\
     }								\
@@ -3148,46 +3160,49 @@ do {									\
 
 /* Define the codes that are matched by predicates in sparc.c.  */
 
-#define PREDICATE_CODES								    \
-{"reg_or_0_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},			    \
-{"fp_zero_operand", {CONST_DOUBLE}},						    \
-{"intreg_operand", {SUBREG, REG}},						    \
-{"fcc_reg_operand", {REG}},							    \
-{"icc_or_fcc_reg_operand", {REG}},						    \
-{"restore_operand", {REG}},							    \
-{"call_operand", {MEM}},							    \
-{"call_operand_address", {SYMBOL_REF, LABEL_REF, CONST, CONST_DOUBLE, ADDRESSOF,    \
-                          SUBREG, REG, PLUS, LO_SUM, CONST_INT}},		    \
-{"symbolic_operand", {SYMBOL_REF, LABEL_REF, CONST, CONST_DOUBLE}},		    \
-{"symbolic_memory_operand", {SUBREG, MEM}},					    \
-{"label_ref_operand", {LABEL_REF}},						    \
-{"sp64_medium_pic_operand", {CONST}},						    \
-{"data_segment_operand", {SYMBOL_REF, PLUS, CONST}},				    \
-{"text_segment_operand", {LABEL_REF, SYMBOL_REF, PLUS, CONST}},			    \
-{"reg_or_nonsymb_mem_operand", {SUBREG, REG, MEM}},				    \
-{"sparc_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT, MEM}},		    \
-{"move_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT, CONST_DOUBLE, MEM}},	    \
-{"splittable_symbolic_memory_operand", {MEM}},					    \
-{"splittable_immediate_memory_operand", {MEM}},					    \
-{"eq_or_neq", {EQ, NE}},							    \
-{"normal_comp_operator", {GE, GT, LE, LT, GTU, LEU}},				    \
-{"noov_compare_op", {NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU}},		    \
-{"v9_regcmp_op", {EQ, NE, GE, LT, LE, GT}},					    \
-{"v8plus_regcmp_op", {EQ, NE}},							    \
-{"extend_op", {SIGN_EXTEND, ZERO_EXTEND}},					    \
-{"cc_arithop", {AND, IOR, XOR}},						    \
-{"cc_arithopn", {AND, IOR}},							    \
-{"arith_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT}},			    \
-{"arith11_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT}},			    \
-{"arith10_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT}},			    \
-{"arith_double_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT, CONST_DOUBLE}},   \
-{"arith11_double_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT, CONST_DOUBLE}}, \
-{"arith10_double_operand", {SUBREG, REG, CONSTANT_P_RTX, CONST_INT, CONST_DOUBLE}}, \
-{"small_int", {CONST_INT, CONSTANT_P_RTX}},					    \
-{"uns_small_int", {CONST_INT, CONSTANT_P_RTX}},					    \
-{"uns_arith_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},		    \
-{"clobbered_register", {REG}},
-
+#define PREDICATE_CODES							\
+{"reg_or_0_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},		\
+{"fp_zero_operand", {CONST_DOUBLE}},					\
+{"intreg_operand", {SUBREG, REG}},					\
+{"fcc_reg_operand", {REG}},						\
+{"icc_or_fcc_reg_operand", {REG}},					\
+{"restore_operand", {REG}},						\
+{"call_operand", {MEM}},						\
+{"call_operand_address", {SYMBOL_REF, LABEL_REF, CONST, CONST_DOUBLE,	\
+	ADDRESSOF, SUBREG, REG, PLUS, LO_SUM, CONST_INT}},		\
+{"symbolic_operand", {SYMBOL_REF, LABEL_REF, CONST, CONST_DOUBLE}},	\
+{"symbolic_memory_operand", {SUBREG, MEM}},				\
+{"label_ref_operand", {LABEL_REF}},					\
+{"sp64_medium_pic_operand", {CONST}},					\
+{"data_segment_operand", {SYMBOL_REF, PLUS, CONST}},			\
+{"text_segment_operand", {LABEL_REF, SYMBOL_REF, PLUS, CONST}},		\
+{"reg_or_nonsymb_mem_operand", {SUBREG, REG, MEM}},			\
+{"splittable_symbolic_memory_operand", {MEM}},				\
+{"splittable_immediate_memory_operand", {MEM}},				\
+{"eq_or_neq", {EQ, NE}},						\
+{"normal_comp_operator", {GE, GT, LE, LT, GTU, LEU}},			\
+{"noov_compare_op", {NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU}},	\
+{"v9_regcmp_op", {EQ, NE, GE, LT, LE, GT}},				\
+{"extend_op", {SIGN_EXTEND, ZERO_EXTEND}},				\
+{"cc_arithop", {AND, IOR, XOR}},					\
+{"cc_arithopn", {AND, IOR}},						\
+{"arith_operand", {SUBREG, REG, CONST_INT}},				\
+{"arith_add_operand", {SUBREG, REG, CONST_INT}},			\
+{"arith11_operand", {SUBREG, REG, CONST_INT}},				\
+{"arith10_operand", {SUBREG, REG, CONST_INT}},				\
+{"arith_double_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},	\
+{"arith_double_add_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},	\
+{"arith11_double_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},	\
+{"arith10_double_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},	\
+{"small_int", {CONST_INT}},						\
+{"small_int_or_double", {CONST_INT, CONST_DOUBLE}},			\
+{"uns_small_int", {CONST_INT}},						\
+{"uns_arith_operand", {SUBREG, REG, CONST_INT}},			\
+{"clobbered_register", {REG}},						\
+{"input_operand", {SUBREG, REG, CONST_INT, MEM, CONST}},		\
+{"zero_operand", {CONST_INT}},						\
+{"const64_operand", {CONST_INT, CONST_DOUBLE}},				\
+{"const64_high_operand", {CONST_INT, CONST_DOUBLE}},
 
 /* The number of Pmode words for the setjmp buffer.  */
 #define JMP_BUF_SIZE 12
@@ -3196,17 +3211,15 @@ do {									\
 
 /* Declare functions defined in sparc.c and used in templates.  */
 
-extern char *doublemove_string ();
-extern char *output_block_move ();
+extern void sparc_emit_set_const32 ();
+extern void sparc_emit_set_const64 ();
+extern void sparc_emit_set_symbolic_const64 ();
+extern int sparc_splitdi_legitimate ();
+extern int sparc_absnegfloat_split_legitimate ();
+
 extern char *output_cbranch ();
-extern char *output_fp_move_double ();
-extern char *output_fp_move_quad ();
-extern char *output_move_double ();
-extern char *output_move_quad ();
-extern char *output_return ();
-extern char *output_scc_insn ();
+extern const char *output_return ();
 extern char *output_v9branch ();
-extern char *singlemove_string ();
 
 extern void emit_v9_brxx_insn ();
 extern void finalize_pic ();
@@ -3224,8 +3237,16 @@ extern int arith10_operand ();
 extern int arith11_double_operand ();
 extern int arith11_operand ();
 extern int arith_double_operand ();
+extern int arith_double_4096_operand ();
+extern int arith_double_add_operand ();
 extern int arith_operand ();
+extern int arith_4096_operand ();
+extern int arith_add_operand ();
 extern int call_operand_address ();
+extern int input_operand ();
+extern int zero_operand ();
+extern int const64_operand ();
+extern int const64_high_operand ();
 extern int cc_arithop ();
 extern int cc_arithopn ();
 extern int check_pic ();
@@ -3239,8 +3260,7 @@ extern int fcc_reg_operand ();
 extern int fp_zero_operand ();
 extern int icc_or_fcc_reg_operand ();
 extern int label_ref_operand ();
-extern int mem_aligned_8 ();
-extern int move_operand ();
+extern int mem_min_alignment ();
 extern int noov_compare_op ();
 extern int pic_address_needs_scratch ();
 extern int reg_or_0_operand ();
@@ -3251,27 +3271,29 @@ extern int registers_ok_for_ldd_peep ();
 extern int restore_operand ();
 extern int short_branch ();
 extern int small_int ();
+extern int small_int_or_double ();
 extern int sp64_medium_pic_operand ();
 extern int sparc_flat_eligible_for_epilogue_delay ();
 extern int sparc_flat_epilogue_delay_slots ();
 extern int sparc_issue_rate ();
-extern int sparc_operand ();
 extern int splittable_immediate_memory_operand ();
 extern int splittable_symbolic_memory_operand ();
-extern int supersparc_adjust_cost ();
+extern int sparc_adjust_cost ();
 extern int symbolic_memory_operand ();
 extern int symbolic_operand ();
 extern int text_segment_operand ();
-extern int ultrasparc_adjust_cost ();
 extern int uns_small_int ();
-extern int v8plus_regcmp_op ();
-extern int v8plus_regcmp_p ();
 extern int v9_regcmp_op ();
 extern int v9_regcmp_p ();
 
 extern unsigned long sparc_flat_compute_frame_size ();
 extern unsigned long sparc_type_code ();
 
+extern void sparc_function_profiler ();
+extern void sparc_function_block_profiler ();
+extern void sparc_block_profiler ();
+extern void sparc_function_block_profiler_exit ();
+
 extern char *sparc_v8plus_shift ();
 
 #ifdef __STDC__
diff --git a/contrib/gcc/config/sparc/sparc.md b/contrib/gcc/config/sparc/sparc.md
index ccfde05..02170b7 100644
--- a/contrib/gcc/config/sparc/sparc.md
+++ b/contrib/gcc/config/sparc/sparc.md
@@ -1,5 +1,5 @@
 ;;- Machine description for SPARC chip for GNU C compiler
-;;  Copyright (C) 1987, 88, 89, 92-96, 1997 Free Software Foundation, Inc.
+;;  Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
 ;;  Contributed by Michael Tiemann (tiemann@cygnus.com)
 ;;  64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
 ;;  at Cygnus Support.
@@ -23,6 +23,37 @@
 
 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
 
+;; Uses of UNSPEC and UNSPEC_VOLATILE in this file:
+;;
+;; UNSPEC:		0	movsi_{lo_sum,high}_pic
+;;				pic_lo_sum_di
+;;				pic_sethi_di
+;;			1	update_return
+;;			2	get_pc
+;;			5	movsi_{,lo_sum_,high_}pic_label_ref
+;;			6	seth44
+;;			7	setm44
+;;			8	setl44
+;;			9	sethh
+;;			10	setlm
+;;			11	embmedany_sethi, embmedany_brsum
+;;                      12	movsf_const_high
+;;			13	embmedany_textuhi
+;;			14	embmedany_texthi
+;;			15	embmedany_textulo
+;;			16	embmedany_textlo
+;;                      17	movsf_const_lo
+;;			18	sethm
+;;			19	setlo
+;;
+;; UNSPEC_VOLATILE:	0	blockage
+;;			1	flush_register_windows
+;;			2	goto_handler_and_restore
+;;			3	goto_handler_and_restore_v9*
+;;			4	flush
+;;			5	nonlocal_goto_receiver
+;;
+
 ;; The upper 32 fp regs on the v9 can't hold SFmode values.  To deal with this
 ;; a second register class, EXTRA_FP_REGS, exists for the v9 chip.  The name
 ;; is a bit of a misnomer as it covers all 64 fp regs.  The corresponding
@@ -34,7 +65,7 @@
 
 ;; Attribute for cpu type.
 ;; These must match the values for enum processor_type in sparc.h.
-(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v9,ultrasparc"
+(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,hypersparc,sparclite86x,sparclet,tsc701,v9,ultrasparc"
   (const (symbol_ref "sparc_cpu_attr")))
 
 ;; Attribute for the instruction set.
@@ -128,8 +159,7 @@
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
 (define_attr "leaf_function" "false,true"
-  (const (symbol_ref "leaf_function")))
-
+  (const (symbol_ref "current_function_uses_only_leaf_regs")))
 
 (define_attr "in_return_delay" "false,true"
   (if_then_else (and (and (and (eq_attr "type" "move,load,sload,store,binary,ialu")
@@ -315,6 +345,53 @@
     (eq_attr "type" "imul"))
   4 4)
 
+;; ----- hypersparc/sparclite86x scheduling
+;; The Hypersparc can issue 1 - 2 insns per cycle.  The dual issue cases are:
+;; L-Ld/St I-Int F-Float B-Branch LI/LF/LB/II/IF/IB/FF/FB
+;; II/FF case is only when loading a 32 bit hi/lo constant
+;; Single issue insns include call, jmpl, u/smul, u/sdiv, lda, sta, fcmp
+;; Memory delivers its result in one cycle to IU
+
+(define_function_unit "memory" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "load,sload,fpload"))
+  1 1)
+
+(define_function_unit "memory" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "store,fpstore"))
+  2 1)
+
+(define_function_unit "fp_alu" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fp,fpmove,fpcmp"))
+  1 1)
+
+(define_function_unit "fp_mds" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpmul"))
+  1 1)
+
+(define_function_unit "fp_mds" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpdivs"))
+  8 6)
+
+(define_function_unit "fp_mds" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpdivd"))
+  12 10)
+
+(define_function_unit "fp_mds" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpsqrt"))
+  17 15)
+
+(define_function_unit "fp_mds" 1 0
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "imul"))
+  17 15)
+
 ;; ----- sparclet tsc701 scheduling
 ;; The tsc701 issues 1 insn per cycle.
 ;; Results may be written back out of order.
@@ -370,21 +447,31 @@
     (eq_attr "type" "store,fpstore"))
   1 1)
 
-(define_function_unit "ieu" 1 0
+(define_function_unit "ieuN" 2 0
   (and (eq_attr "cpu" "ultrasparc")
-    (eq_attr "type" "ialu,binary,shift,compare,cmove,call"))
+    (eq_attr "type" "ialu,binary,move,unary,shift,compare,call,call_no_delay_slot,uncond_branch"))
   1 1)
 
-(define_function_unit "ieu_shift" 1 0
+(define_function_unit "ieu0" 1 0
   (and (eq_attr "cpu" "ultrasparc")
     (eq_attr "type" "shift"))
   1 1)
 
-(define_function_unit "ieu_shift" 1 0
+(define_function_unit "ieu0" 1 0
   (and (eq_attr "cpu" "ultrasparc")
     (eq_attr "type" "cmove"))
   2 1)
 
+(define_function_unit "ieu1" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "compare,call,call_no_delay_slot,uncond_branch"))
+  1 1)
+
+(define_function_unit "cti" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "branch"))
+  1 1)
+
 ;; Timings; throughput/latency
 ;; FMOV     1/1    fmov, fabs, fneg
 ;; FMOVcc   1/2
@@ -395,6 +482,11 @@
 ;; FSQRTs   12/12
 ;; FSQRTd   22/22
 ;; FCMP takes 1 cycle to branch, 2 cycles to conditional move.
+;;
+;; ??? This is really bogus because the timings really depend upon
+;; who uses the result.  We should record who the user is with
+;; more descriptive 'type' attribute names and account for these
+;; issues in ultrasparc_adjust_cost. 
 
 (define_function_unit "fadd" 1 0
   (and (eq_attr "cpu" "ultrasparc")
@@ -426,17 +518,17 @@
     (eq_attr "type" "fpcmove"))
   2 1)
 
-(define_function_unit "fadd" 1 0
+(define_function_unit "fmul" 1 0
   (and (eq_attr "cpu" "ultrasparc")
     (eq_attr "type" "fpdivs"))
   12 12)
 
-(define_function_unit "fadd" 1 0
+(define_function_unit "fmul" 1 0
   (and (eq_attr "cpu" "ultrasparc")
     (eq_attr "type" "fpdivd"))
   22 22)
 
-(define_function_unit "fadd" 1 0
+(define_function_unit "fmul" 1 0
   (and (eq_attr "cpu" "ultrasparc")
     (eq_attr "type" "fpsqrt"))
   12 12)
@@ -475,7 +567,7 @@
   [(set (reg:CCX 100)
 	(compare:CCX (match_operand:DI 0 "register_operand" "")
 		     (match_operand:DI 1 "arith_double_operand" "")))]
-  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "TARGET_ARCH64"
   "
 {
   sparc_compare_op0 = operands[0];
@@ -529,7 +621,7 @@
 	(compare:CC (match_operand:SI 0 "register_operand" "r")
 		    (match_operand:SI 1 "arith_operand" "rI")))]
   ""
-  "cmp %0,%1"
+  "cmp\\t%0, %1"
   [(set_attr "type" "compare")])
 
 (define_insn "*cmpdi_sp64"
@@ -537,42 +629,9 @@
 	(compare:CCX (match_operand:DI 0 "register_operand" "r")
 		     (match_operand:DI 1 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
-  "cmp %0,%1"
+  "cmp\\t%0, %1"
   [(set_attr "type" "compare")])
 
-(define_insn "cmpdi_v8plus"
-  [(set (reg:CCX 100)
-	(compare:CCX (match_operand:DI 0 "register_operand" "r,r,r")
-		     (match_operand:DI 1 "arith_double_operand" "J,I,r")))
-   (clobber (match_scratch:SI 2 "=&h,&h,&h"))
-   (clobber (match_scratch:SI 3 "=X,X,&h"))]
-  "TARGET_V8PLUS"
-  "*
-{
-  /* The srl can be omitted if the value in the %L0 or %L1 is already
-     zero extended.  */
-
-  output_asm_insn (\"sllx %H0,32,%2\", operands);
-
-  if (sparc_check_64 (operands[0], insn) <= 0)
-    output_asm_insn (\"srl %L0,0,%L0\", operands);
-
-  switch (which_alternative)
-    {
-    case 0:
-      return \"orcc %L0,%2,%%g0\";
-    case 1:
-      return \"or %L0,%2,%2\;cmp %2,%1\";
-    case 2:
-      if (sparc_check_64 (operands[1], insn) <= 0)
-	output_asm_insn (\"srl %L1,0,%L1\", operands);
-      return \"sllx %H1,32,%3\;or %L0,%2,%2\;or %L1,%3,%3\;cmp %2,%3\";
-    default:
-      abort();
-    }
-}"
-  [(set_attr "length" "3,4,7")])
-
 (define_insn "*cmpsf_fpe"
   [(set (match_operand:CCFPE 0 "fcc_reg_operand" "=c")
 	(compare:CCFPE (match_operand:SF 1 "register_operand" "f")
@@ -581,8 +640,8 @@
   "*
 {
   if (TARGET_V9)
-    return \"fcmpes %0,%1,%2\";
-  return \"fcmpes %1,%2\";
+    return \"fcmpes\\t%0, %1, %2\";
+  return \"fcmpes\\t%1, %2\";
 }"
   [(set_attr "type" "fpcmp")])
 
@@ -594,8 +653,8 @@
   "*
 {
   if (TARGET_V9)
-    return \"fcmped %0,%1,%2\";
-  return \"fcmped %1,%2\";
+    return \"fcmped\\t%0, %1, %2\";
+  return \"fcmped\\t%1, %2\";
 }"
   [(set_attr "type" "fpcmp")])
 
@@ -607,8 +666,8 @@
   "*
 {
   if (TARGET_V9)
-    return \"fcmpeq %0,%1,%2\";
-  return \"fcmpeq %1,%2\";
+    return \"fcmpeq\\t%0, %1, %2\";
+  return \"fcmpeq\\t%1, %2\";
 }"
   [(set_attr "type" "fpcmp")])
 
@@ -620,8 +679,8 @@
   "*
 {
   if (TARGET_V9)
-    return \"fcmps %0,%1,%2\";
-  return \"fcmps %1,%2\";
+    return \"fcmps\\t%0, %1, %2\";
+  return \"fcmps\\t%1, %2\";
 }"
   [(set_attr "type" "fpcmp")])
 
@@ -633,8 +692,8 @@
   "*
 {
   if (TARGET_V9)
-    return \"fcmpd %0,%1,%2\";
-  return \"fcmpd %1,%2\";
+    return \"fcmpd\\t%0, %1, %2\";
+  return \"fcmpd\\t%1, %2\";
 }"
   [(set_attr "type" "fpcmp")])
 
@@ -646,8 +705,8 @@
   "*
 {
   if (TARGET_V9)
-    return \"fcmpq %0,%1,%2\";
-  return \"fcmpq %1,%2\";
+    return \"fcmpq\\t%0, %1, %2\";
+  return \"fcmpq\\t%1, %2\";
 }"
   [(set_attr "type" "fpcmp")])
 
@@ -704,7 +763,7 @@
 	(xor:DI (match_operand:DI 1 "register_operand" "")
 		(match_operand:DI 2 "register_operand" "")))
    (set (match_operand:SI 0 "register_operand" "")
-	(eq:DI (match_dup 3) (const_int 0)))]
+	(eq:SI (match_dup 3) (const_int 0)))]
   "TARGET_ARCH64"
   "{ operands[3] = gen_reg_rtx (DImode); }")
 
@@ -713,7 +772,7 @@
 	(xor:DI (match_operand:DI 1 "register_operand" "")
 		(match_operand:DI 2 "register_operand" "")))
    (set (match_operand:SI 0 "register_operand" "")
-	(ne:DI (match_dup 3) (const_int 0)))]
+	(ne:SI (match_dup 3) (const_int 0)))]
   "TARGET_ARCH64"
   "{ operands[3] = gen_reg_rtx (DImode); }")
 
@@ -722,7 +781,7 @@
 	(xor:SI (match_operand:SI 1 "register_operand" "")
 		(match_operand:SI 2 "register_operand" "")))
    (parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (eq:SI (match_dup 3) (const_int 0)))
+		   (eq:DI (match_dup 3) (const_int 0)))
 	      (clobber (reg:CC 100))])]
   "TARGET_ARCH64"
   "{ operands[3] = gen_reg_rtx (SImode); }")
@@ -732,7 +791,7 @@
 	(xor:SI (match_operand:SI 1 "register_operand" "")
 		(match_operand:SI 2 "register_operand" "")))
    (parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (ne:SI (match_dup 3) (const_int 0)))
+		   (ne:DI (match_dup 3) (const_int 0)))
 	      (clobber (reg:CC 100))])]
   "TARGET_ARCH64"
   "{ operands[3] = gen_reg_rtx (SImode); }")
@@ -787,7 +846,7 @@
 	DONE;
       /* fall through */
     }
-  operands[1] = gen_compare_reg (EQ, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 ;; ??? v9: Operand 0 needs a mode, so SImode was chosen.
@@ -840,7 +899,7 @@
 	DONE;
       /* fall through */
     }
-  operands[1] = gen_compare_reg (NE, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 (define_expand "sgt"
@@ -861,7 +920,7 @@
 	DONE;
       /* fall through */
     }
-  operands[1] = gen_compare_reg (GT, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 (define_expand "slt"
@@ -882,7 +941,7 @@
 	DONE;
       /* fall through */
     }
-  operands[1] = gen_compare_reg (LT, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 (define_expand "sge"
@@ -903,7 +962,7 @@
 	DONE;
       /* fall through */
     }
-  operands[1] = gen_compare_reg (GE, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 (define_expand "sle"
@@ -924,7 +983,7 @@
 	DONE;
       /* fall through */
     }
-  operands[1] = gen_compare_reg (LE, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 (define_expand "sgtu"
@@ -935,7 +994,7 @@
 {
   if (! TARGET_V9)
     {
-      rtx tem;
+      rtx tem, pat;
 
       /* We can do ltu easily, so if both operands are registers, swap them and
 	 do a LTU.  */
@@ -947,7 +1006,10 @@
 	  tem = sparc_compare_op0;
 	  sparc_compare_op0 = sparc_compare_op1;
 	  sparc_compare_op1 = tem;
-	  emit_insn (gen_sltu (operands[0]));
+	  pat = gen_sltu (operands[0]);
+          if (pat == NULL_RTX)
+            FAIL;
+          emit_insn (pat);
 	  DONE;
 	}
     }
@@ -956,7 +1018,7 @@
       if (gen_v9_scc (GTU, operands))
 	DONE;
     }
-  operands[1] = gen_compare_reg (GTU, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 (define_expand "sltu"
@@ -995,7 +1057,7 @@
 {
   if (! TARGET_V9)
     {
-      rtx tem;
+      rtx tem, pat;
 
       /* We can do geu easily, so if both operands are registers, swap them and
 	 do a GEU.  */
@@ -1007,7 +1069,10 @@
 	  tem = sparc_compare_op0;
 	  sparc_compare_op0 = sparc_compare_op1;
 	  sparc_compare_op1 = tem;
-	  emit_insn (gen_sgeu (operands[0]));
+	  pat = gen_sgeu (operands[0]);
+          if (pat == NULL_RTX)
+            FAIL;
+          emit_insn (pat);
 	  DONE;
 	}
     }
@@ -1016,13 +1081,15 @@
       if (gen_v9_scc (LEU, operands))
 	DONE;
     }
-  operands[1] = gen_compare_reg (LEU, sparc_compare_op0, sparc_compare_op1);
+  FAIL;
 }")
 
 ;; Now the DEFINE_INSNs for the scc cases.
 
 ;; The SEQ and SNE patterns are special because they can be done
-;; without any branching and do not involve a COMPARE.
+;; without any branching and do not involve a COMPARE.  We want
+;; them to always use the splitz below so the results can be
+;; scheduled.
 
 (define_insn "*snesi_zero"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1030,9 +1097,19 @@
 	       (const_int 0)))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;addx %%g0,0,%0"
-  [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+  "#"
+  [(set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ne:SI (match_operand:SI 1 "register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC 100))]
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (ltu:SI (reg:CC 100) (const_int 0)))]
+  "")
 
 (define_insn "*neg_snesi_zero"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1040,56 +1117,126 @@
 		       (const_int 0))))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;subx %%g0,0,%0"
-  [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+  "#"
+  [(set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(neg:SI (ne:SI (match_operand:SI 1 "register_operand" "")
+		       (const_int 0))))
+   (clobber (reg:CC 100))]
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (neg:SI (ltu:SI (reg:CC 100) (const_int 0))))]
+  "")
 
 (define_insn "*snesi_zero_extend"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	(ne:SI (match_operand:SI 1 "register_operand" "r")
+	(ne:DI (match_operand:SI 1 "register_operand" "r")
 	       (const_int 0)))
    (clobber (reg:CC 100))]
   "TARGET_ARCH64"
-  "subcc %%g0,%1,%%g0\;addx %%g0,0,%0"
+  "#"
   [(set_attr "type" "unary")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (ne:DI (match_operand:SI 1 "register_operand" "")
+               (const_int 0)))
+   (clobber (reg:CC 100))]
+  "TARGET_ARCH64"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (const_int 0) (match_dup 1))
+                                           (const_int 0)))
+   (set (match_dup 0) (zero_extend:DI (plus:SI (plus:SI (const_int 0)
+                                                        (const_int 0))
+                                               (ltu:SI (reg:CC_NOOV 100)
+                                                       (const_int 0)))))]
+  "")
+
 (define_insn "*snedi_zero"
   [(set (match_operand:DI 0 "register_operand" "=&r")
 	(ne:DI (match_operand:DI 1 "register_operand" "r")
 	       (const_int 0)))]
   "TARGET_ARCH64"
-  "mov 0,%0\;movrnz %1,1,%0"
+  "#"
   [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (ne:DI (match_operand:DI 1 "register_operand" "")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  "")
+
 (define_insn "*neg_snedi_zero"
   [(set (match_operand:DI 0 "register_operand" "=&r")
 	(neg:DI (ne:DI (match_operand:DI 1 "register_operand" "r")
 		       (const_int 0))))]
   "TARGET_ARCH64"
-  "mov 0,%0\;movrnz %1,-1,%0"
+  "#"
   [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (neg:DI (ne:DI (match_operand:DI 1 "register_operand" "")
+                       (const_int 0))))]
+  "TARGET_ARCH64"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int -1)
+                                       (match_dup 0)))]
+  "")
+
 (define_insn "*snedi_zero_trunc"
   [(set (match_operand:SI 0 "register_operand" "=&r")
-	(ne:DI (match_operand:DI 1 "register_operand" "r")
+	(ne:SI (match_operand:DI 1 "register_operand" "r")
 	       (const_int 0)))]
   "TARGET_ARCH64"
-  "mov 0,%0\;movrnz %1,1,%0"
+  "#"
   [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ne:SI (match_operand:DI 1 "register_operand" "")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:SI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  "")
+
 (define_insn "*seqsi_zero"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(eq:SI (match_operand:SI 1 "register_operand" "r")
 	       (const_int 0)))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;subx %%g0,-1,%0"
-  [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+  "#"
+  [(set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC 100))]
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (geu:SI (reg:CC 100) (const_int 0)))]
+  "")
 
 (define_insn "*neg_seqsi_zero"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1097,47 +1244,107 @@
 		       (const_int 0))))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;addx %%g0,-1,%0"
-  [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+  "#"
+  [(set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(neg:SI (eq:SI (match_operand:SI 1 "register_operand" "")
+		       (const_int 0))))
+   (clobber (reg:CC 100))]
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (neg:SI (geu:SI (reg:CC 100) (const_int 0))))]
+  "")
 
 (define_insn "*seqsi_zero_extend"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	(eq:SI (match_operand:SI 1 "register_operand" "r")
+	(eq:DI (match_operand:SI 1 "register_operand" "r")
 	       (const_int 0)))
    (clobber (reg:CC 100))]
   "TARGET_ARCH64"
-  "subcc %%g0,%1,%%g0\;subx %%g0,-1,%0"
+  "#"
   [(set_attr "type" "unary")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (eq:DI (match_operand:SI 1 "register_operand" "")
+               (const_int 0)))
+   (clobber (reg:CC 100))]
+  "TARGET_ARCH64"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (const_int 0) (match_dup 1))
+                                           (const_int 0)))
+   (set (match_dup 0) (zero_extend:DI (minus:SI (minus:SI (const_int 0)
+                                                          (const_int -1))
+                                                (ltu:SI (reg:CC_NOOV 100)
+                                                        (const_int 0)))))]
+  "")
+
 (define_insn "*seqdi_zero"
   [(set (match_operand:DI 0 "register_operand" "=&r")
 	(eq:DI (match_operand:DI 1 "register_operand" "r")
 	       (const_int 0)))]
   "TARGET_ARCH64"
-  "mov 0,%0\;movrz %1,1,%0"
+  "#"
   [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (eq:DI (match_operand:DI 1 "register_operand" "")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  "")
+
 (define_insn "*neg_seqdi_zero"
   [(set (match_operand:DI 0 "register_operand" "=&r")
 	(neg:DI (eq:DI (match_operand:DI 1 "register_operand" "r")
 		       (const_int 0))))]
   "TARGET_ARCH64"
-  "mov 0,%0\;movrz %1,-1,%0"
+  "#"
   [(set_attr "type" "cmove")
    (set_attr "length" "2")]) 
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (neg:DI (eq:DI (match_operand:DI 1 "register_operand" "")
+                       (const_int 0))))]
+  "TARGET_ARCH64"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int -1)
+                                       (match_dup 0)))]
+  "")
+
 (define_insn "*seqdi_zero_trunc"
   [(set (match_operand:SI 0 "register_operand" "=&r")
-	(eq:DI (match_operand:DI 1 "register_operand" "r")
+	(eq:SI (match_operand:DI 1 "register_operand" "r")
 	       (const_int 0)))]
   "TARGET_ARCH64"
-  "mov 0,%0\;movrz %1,1,%0"
+  "#"
   [(set_attr "type" "cmove")
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (eq:SI (match_operand:DI 1 "register_operand" "")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:SI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  "")
+
 ;; We can also do (x + (i == 0)) and related, so put them in.
 ;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
 ;; versions for v9.
@@ -1149,9 +1356,22 @@
 		 (match_operand:SI 2 "register_operand" "r")))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;addx %2,0,%0"
+  "#"
   [(set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (ne:SI (match_operand:SI 1 "register_operand" "")
+			(const_int 0))
+		 (match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 100))]
+  "! TARGET_LIVE_G0"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
+			       (match_dup 2)))]
+  "")
+
 (define_insn "*x_minus_i_ne_0"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(minus:SI (match_operand:SI 2 "register_operand" "r")
@@ -1159,9 +1379,22 @@
 			 (const_int 0))))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;subx %2,0,%0"
+  "#"
   [(set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 2 "register_operand" "")
+		  (ne:SI (match_operand:SI 1 "register_operand" "")
+			 (const_int 0))))
+   (clobber (reg:CC 100))]
+  "! TARGET_LIVE_G0"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (minus:SI (match_dup 2)
+				(ltu:SI (reg:CC 100) (const_int 0))))]
+  "")
+
 (define_insn "*x_plus_i_eq_0"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(plus:SI (eq:SI (match_operand:SI 1 "register_operand" "r")
@@ -1169,9 +1402,22 @@
 		 (match_operand:SI 2 "register_operand" "r")))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;subx %2,-1,%0"
+  "#"
   [(set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (eq:SI (match_operand:SI 1 "register_operand" "")
+			(const_int 0))
+		 (match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 100))]
+  "! TARGET_LIVE_G0"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (plus:SI (geu:SI (reg:CC 100) (const_int 0))
+			       (match_dup 2)))]
+  "")
+
 (define_insn "*x_minus_i_eq_0"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(minus:SI (match_operand:SI 2 "register_operand" "r")
@@ -1179,9 +1425,22 @@
 			 (const_int 0))))
    (clobber (reg:CC 100))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%%g0\;addx %2,-1,%0"
+  "#"
   [(set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 2 "register_operand" "")
+		  (eq:SI (match_operand:SI 1 "register_operand" "")
+			 (const_int 0))))
+   (clobber (reg:CC 100))]
+  "! TARGET_LIVE_G0"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (minus:SI (match_dup 2)
+				(geu:SI (reg:CC 100) (const_int 0))))]
+  "")
+
 ;; We can also do GEU and LTU directly, but these operate after a compare.
 ;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
 ;; versions for v9.
@@ -1190,15 +1449,17 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(ltu:SI (reg:CC 100) (const_int 0)))]
   "! TARGET_LIVE_G0"
-  "addx %%g0,0,%0"
-  [(set_attr "type" "misc")])
+  "addx\\t%%g0, 0, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*neg_sltu_insn"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(neg:SI (ltu:SI (reg:CC 100) (const_int 0))))]
   "! TARGET_LIVE_G0"
-  "subx %%g0,0,%0"
-  [(set_attr "type" "misc")])
+  "subx\\t%%g0, 0, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 ;; ??? Combine should canonicalize these next two to the same pattern.
 (define_insn "*neg_sltu_minus_x"
@@ -1206,30 +1467,34 @@
 	(minus:SI (neg:SI (ltu:SI (reg:CC 100) (const_int 0)))
 		  (match_operand:SI 1 "arith_operand" "rI")))]
   "! TARGET_LIVE_G0"
-  "subx %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "subx\\t%%g0, %1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*neg_sltu_plus_x"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(neg:SI (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
 			 (match_operand:SI 1 "arith_operand" "rI"))))]
   "! TARGET_LIVE_G0"
-  "subx %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "subx\\t%%g0, %1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*sgeu_insn"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(geu:SI (reg:CC 100) (const_int 0)))]
   "! TARGET_LIVE_G0"
-  "subx %%g0,-1,%0"
-  [(set_attr "type" "misc")])
+  "subx\\t%%g0, -1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*neg_sgeu_insn"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(neg:SI (geu:SI (reg:CC 100) (const_int 0))))]
   "! TARGET_LIVE_G0"
-  "addx %%g0,-1,%0"
-  [(set_attr "type" "misc")])
+  "addx\\t%%g0, -1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 ;; We can also do (x + ((unsigned) i >= 0)) and related, so put them in.
 ;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
@@ -1240,8 +1505,9 @@
 	(plus:SI (ltu:SI (reg:CC 100) (const_int 0))
 		 (match_operand:SI 1 "arith_operand" "rI")))]
   "! TARGET_LIVE_G0"
-  "addx %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "addx\\t%%g0, %1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*sltu_plus_x_plus_y"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1249,64 +1515,57 @@
 		 (plus:SI (match_operand:SI 1 "arith_operand" "%r")
 			  (match_operand:SI 2 "arith_operand" "rI"))))]
   ""
-  "addx %1,%2,%0")
+  "addx\\t%1, %2, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*x_minus_sltu"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(minus:SI (match_operand:SI 1 "register_operand" "r")
 		  (ltu:SI (reg:CC 100) (const_int 0))))]
   ""
-  "subx %1,0,%0"
-  [(set_attr "type" "unary")])
+  "subx\\t%1, 0, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 ;; ??? Combine should canonicalize these next two to the same pattern.
 (define_insn "*x_minus_y_minus_sltu"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+	(minus:SI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
 			    (match_operand:SI 2 "arith_operand" "rI"))
 		  (ltu:SI (reg:CC 100) (const_int 0))))]
   ""
-  "subx %1,%2,%0")
+  "subx\\t%r1, %2, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*x_minus_sltu_plus_y"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (match_operand:SI 1 "register_operand" "r")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
 		  (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
 			   (match_operand:SI 2 "arith_operand" "rI"))))]
   ""
-  "subx %1,%2,%0")
+  "subx\\t%r1, %2, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*sgeu_plus_x"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(plus:SI (geu:SI (reg:CC 100) (const_int 0))
 		 (match_operand:SI 1 "register_operand" "r")))]
   ""
-  "subx %1,-1,%0"
-  [(set_attr "type" "unary")])
+  "subx\\t%1, -1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "*x_minus_sgeu"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(minus:SI (match_operand:SI 1 "register_operand" "r")
 		  (geu:SI (reg:CC 100) (const_int 0))))]
   ""
-  "addx %1,-1,%0"
-  [(set_attr "type" "unary")])
-
-;; Now we have the generic scc insns.
-;; !v9: These will be done using a jump.
-;; v9: Use conditional moves which are defined elsewhere.
-;; We have to exclude the cases above, since we will not want combine to
-;; turn something that does not require a jump into something that does.
-
-(define_insn "*scc_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(match_operator:SI 2 "noov_compare_op"
-			   [(match_operand 1 "icc_or_fcc_reg_operand" "")
-			    (const_int 0)]))]
-  ""
-  "* return output_scc_insn (operands, insn); "
-  [(set_attr "type" "multi")
-   (set_attr "length" "3")])
+  "addx\\t%1, -1, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1324,15 +1583,6 @@
 			 (match_dup 0)))]
   "")
 
-(define_insn "*scc_di"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(match_operator:DI 2 "noov_compare_op"
-			   [(match_operand 1 "icc_or_fcc_reg_operand" "")
-			    (const_int 0)]))]
-  "TARGET_ARCH64"
-  "* return output_scc_insn (operands, insn); "
-  [(set_attr "type" "multi")
-   (set_attr "length" "3")])
 
 ;; These control RTL generation for conditional jump insns
 
@@ -1526,6 +1776,7 @@
 
 ;; Now match both normal and inverted jump.
 
+;; XXX fpcmp nop braindamage
 (define_insn "*normal_branch"
   [(set (pc)
 	(if_then_else (match_operator 0 "noov_compare_op"
@@ -1541,6 +1792,7 @@
 }"
   [(set_attr "type" "branch")])
 
+;; XXX fpcmp nop braindamage
 (define_insn "*inverted_branch"
   [(set (pc)
 	(if_then_else (match_operator 0 "noov_compare_op"
@@ -1556,6 +1808,7 @@
 }"
   [(set_attr "type" "branch")])
 
+;; XXX fpcmp nop braindamage
 (define_insn "*normal_fp_branch"
   [(set (pc)
 	(if_then_else (match_operator 1 "comparison_operator"
@@ -1572,6 +1825,7 @@
 }"
   [(set_attr "type" "branch")])
 
+;; XXX fpcmp nop braindamage
 (define_insn "*inverted_fp_branch"
   [(set (pc)
 	(if_then_else (match_operator 1 "comparison_operator"
@@ -1588,6 +1842,7 @@
 }"
   [(set_attr "type" "branch")])
 
+;; XXX fpcmp nop braindamage
 (define_insn "*normal_fpe_branch"
   [(set (pc)
 	(if_then_else (match_operator 1 "comparison_operator"
@@ -1604,6 +1859,7 @@
 }"
   [(set_attr "type" "branch")])
 
+;; XXX fpcmp nop braindamage
 (define_insn "*inverted_fpe_branch"
   [(set (pc)
 	(if_then_else (match_operator 1 "comparison_operator"
@@ -1625,6 +1881,7 @@
 
 ;; There are no 32 bit brreg insns.
 
+;; XXX
 (define_insn "*normal_int_branch_sp64"
   [(set (pc)
 	(if_then_else (match_operator 0 "v9_regcmp_op"
@@ -1637,10 +1894,11 @@
 {
   return output_v9branch (operands[0], 1, 2, 0,
 			  final_sequence && INSN_ANNULLED_BRANCH_P (insn),
-			  ! final_sequence);
+			  ! final_sequence, insn);
 }"
   [(set_attr "type" "branch")])
 
+;; XXX
 (define_insn "*inverted_int_branch_sp64"
   [(set (pc)
 	(if_then_else (match_operator 0 "v9_regcmp_op"
@@ -1653,673 +1911,972 @@
 {
   return output_v9branch (operands[0], 1, 2, 1,
 			  final_sequence && INSN_ANNULLED_BRANCH_P (insn),
-			  ! final_sequence);
+			  ! final_sequence, insn);
 }"
   [(set_attr "type" "branch")])
 
-;; Esoteric move insns (lo_sum, high, pic).
-
-(define_insn "*lo_sum_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
-		   (match_operand:SI 2 "immediate_operand" "in")))]
-  ""
-  ;; V9 needs "add" because of the code models.  We still use "or" for v8
-  ;; so we can compare the old compiler with the new.
-  "* return TARGET_ARCH64 ? \"add %1,%%lo(%a2),%0\" : \"or %1,%%lo(%a2),%0\";"
-  ;; Need to set length for this arith insn because operand2
-  ;; is not an "arith_operand".
-  [(set_attr "length" "1")])
-
-;; For PIC, symbol_refs are put inside unspec so that the optimizer will not
-;; confuse them with real addresses.
-(define_insn "pic_lo_sum_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
-		   (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] 0)))]
-  "flag_pic"
-  ;; V9 needs "add" because of the code models.  We still use "or" for v8
-  ;; so we can compare the old compiler with the new.
-  "* return TARGET_ARCH64 ? \"add %1,%%lo(%a2),%0\" : \"or %1,%%lo(%a2),%0\";"
-  ;; Need to set length for this arith insn because operand2
-  ;; is not an "arith_operand".
-  [(set_attr "length" "1")])
-
-;; The PIC version of sethi must appear before the non-pic case so that
-;; the unspec will not be matched as part of the operand.
-;; For PIC, symbol_refs are put inside unspec so that the optimizer will not
-;; confuse them with real addresses.
-(define_insn "pic_sethi_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(high:SI (unspec:SI [(match_operand 1 "" "")] 0)))]
-  "flag_pic && check_pic (1)"
-  "sethi %%hi(%a1),%0"
-  [(set_attr "type" "move")
-   (set_attr "length" "1")])
-
-(define_insn "pic_lo_sum_di"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-        (lo_sum:SI (match_operand:DI 1 "register_operand" "r")
-                   (unspec:SI [(match_operand:DI 2 "immediate_operand" "in")] 0)))]
-  "TARGET_ARCH64 && flag_pic"
-  "add %1,%%lo(%a2),%0"
-  [(set_attr "length" "1")])
-
-(define_insn "pic_sethi_di"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-        (high:SI (unspec:SI [(match_operand 1 "" "")] 0)))]
-  "TARGET_ARCH64 && flag_pic && check_pic (1)"
-  "sethi %%hi(%a1),%0"
-  [(set_attr "type" "move")
-   (set_attr "length" "1")])
+;; Load program counter insns.
 
 (define_insn "get_pc"
   [(clobber (reg:SI 15))
    (set (match_operand 0 "register_operand" "=r")
 	(unspec [(match_operand 1 "" "") (match_operand 2 "" "")] 2))]
   "flag_pic && REGNO (operands[0]) == 23"
-  "sethi %%hi(%a1-4),%0\;call %a2\;add %0,%%lo(%a1+4),%0"
+  "sethi\\t%%hi(%a1-4), %0\\n\\tcall\\t%a2\\n\\tadd\\t%0, %%lo(%a1+4), %0"
   [(set_attr "length" "3")])
 
-(define_insn "get_pc_via_rdpc"
-  [(set (match_operand 0 "register_operand" "=r") (pc))]
-  "TARGET_V9"
-  "rd %%pc,%0"
-  [(set_attr "type" "move")])
-
-(define_insn "*sethi_hi"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(high:HI (match_operand 1 "" "")))]
-  "check_pic (1)"
-  "sethi %%hi(%a1),%0"
-  [(set_attr "type" "move")
-   (set_attr "length" "1")])
+;; Currently unused...
+;; (define_insn "get_pc_via_rdpc"
+;;   [(set (match_operand 0 "register_operand" "=r") (pc))]
+;;   "TARGET_V9"
+;;   "rd\\t%%pc, %0"
+;;   [(set_attr "type" "move")])
 
-;; This must appear after the PIC sethi so that the PIC unspec will not
-;; be matched as part of the operand.
-(define_insn "*sethi_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(high:SI (match_operand 1 "" "")))]
-  "check_pic (1)"
-  "sethi %%hi(%a1),%0"
-  [(set_attr "type" "move")
-   (set_attr "length" "1")])
+
+;; Move instructions
 
-(define_insn "*lo_sum_di_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(lo_sum:DI (match_operand:DI 1 "register_operand" "0")
-		   (match_operand:DI 2 "immediate_operand" "in")))]
-  "! TARGET_ARCH64"
-  "*
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
 {
-  /* Don't output a 64 bit constant, since we can't trust the assembler to
-     handle it correctly.  */
-  if (GET_CODE (operands[2]) == CONST_DOUBLE)
-    operands[2] = GEN_INT (CONST_DOUBLE_LOW (operands[2]));
-  else if (GET_CODE (operands[2]) == CONST_INT
-	   && HOST_BITS_PER_WIDE_INT > 32
-	   && INTVAL (operands[2]) > 0xffffffff)
-    operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffffffff);
+  /* Working with CONST_INTs is easier, so convert
+     a double if needed.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      operands[1] = GEN_INT (CONST_DOUBLE_LOW (operands[1]) & 0xff);
+    }
+  else if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      /* And further, we know for all QI cases that only the
+	 low byte is significant, which we can always process
+	 in a single insn.  So mask it now.  */
+      operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+    }
 
-  return \"or %L1,%%lo(%a2),%L0\";
-}"
-  ;; Need to set length for this arith insn because operand2
-  ;; is not an "arith_operand".
-  [(set_attr "length" "1")])
+  /* Handle sets of MEM first.  */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      /* This checks TARGET_LIVE_G0 for us.  */
+      if (reg_or_0_operand (operands[1], QImode))
+	goto movqi_is_ok;
 
-;; ??? Optimizer does not handle "or %o1,%lo(0),%o1". How about add?
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (QImode, operands[1]);
+	}
+    }
 
-(define_insn "*lo_sum_di_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(lo_sum:DI (match_operand:DI 1 "register_operand" "0")
-		   (match_operand:DI 2 "immediate_operand" "in")))]
-  "TARGET_ARCH64"
-  "*
-{
-  /* Don't output a 64 bit constant, since we can't trust the assembler to
-     handle it correctly.  */
-  if (GET_CODE (operands[2]) == CONST_DOUBLE)
-    operands[2] = GEN_INT (CONST_DOUBLE_LOW (operands[2]));
-  else if (GET_CODE (operands[2]) == CONST_INT
-	   && HOST_BITS_PER_WIDE_INT > 32
-	   && INTVAL (operands[2]) > 0xffffffff)
-    operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffffffff);
-
-  /* Note that we use add here.  This is important because Medium/Anywhere
-     code model support depends on it.  */
-  return \"add %1,%%lo(%a2),%0\";
-}"
-  ;; Need to set length for this arith insn because operand2
-  ;; is not an "arith_operand".
-  [(set_attr "length" "1")])
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], QImode, 0);
 
-(define_insn "*sethi_di_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(high:DI (match_operand 1 "" "")))]
-  "! TARGET_ARCH64 && check_pic (1)"
-  "*
+      if (symbolic_operand (operands[1], QImode))
+	{
+	  operands[1] = legitimize_pic_address (operands[1],
+						QImode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	  goto movqi_is_ok;
+	}
+    }
+
+  /* All QI constants require only one insn, so proceed.  */
+
+ movqi_is_ok:
+  ;
+}")
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "general_operand" "=r,r,m")
+	(match_operand:QI 1 "input_operand"   "rI,m,rJ"))]
+  "(register_operand (operands[0], QImode)
+    || reg_or_0_operand (operands[1], QImode))"
+  "@
+   mov\\t%1, %0
+   ldub\\t%1, %0
+   stb\\t%r1, %0"
+  [(set_attr "type" "move,load,store")
+   (set_attr "length" "1")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
 {
-  rtx op0 = operands[0];
-  rtx op1 = operands[1];
+  /* Working with CONST_INTs is easier, so convert
+     a double if needed.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    operands[1] = GEN_INT (CONST_DOUBLE_LOW (operands[1]));
 
-  if (GET_CODE (op1) == CONST_INT)
+  /* Handle sets of MEM first.  */
+  if (GET_CODE (operands[0]) == MEM)
     {
-      operands[0] = operand_subword (op0, 1, 0, DImode);
-      output_asm_insn (\"sethi %%hi(%a1),%0\", operands);
+      /* This checks TARGET_LIVE_G0 for us.  */
+      if (reg_or_0_operand (operands[1], HImode))
+	goto movhi_is_ok;
 
-      operands[0] = operand_subword (op0, 0, 0, DImode);
-      if (INTVAL (op1) < 0)
-	return \"mov -1,%0\";
-      else
-	return \"mov 0,%0\";
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (HImode, operands[1]);
+	}
     }
-  else if (GET_CODE (op1) == CONST_DOUBLE)
+
+  /* Fixup PIC cases.  */
+  if (flag_pic)
     {
-      operands[0] = operand_subword (op0, 1, 0, DImode);
-      operands[1] = GEN_INT (CONST_DOUBLE_LOW (op1));
-      output_asm_insn (\"sethi %%hi(%a1),%0\", operands);
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], HImode, 0);
 
-      operands[0] = operand_subword (op0, 0, 0, DImode);
-      operands[1] = GEN_INT (CONST_DOUBLE_HIGH (op1));
-      return singlemove_string (operands);
+      if (symbolic_operand (operands[1], HImode))
+	{
+	  operands[1] = legitimize_pic_address (operands[1],
+						HImode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	  goto movhi_is_ok;
+	}
     }
-  else
-    abort ();
-  return \"\";
-}"
-  [(set_attr "type" "move")
-   (set_attr "length" "2")])
 
-;;; ??? This pattern originally clobbered a scratch register.  However, this
-;;; is invalid, the movdi pattern may not use a temp register because it
-;;; may be called from reload to reload a DImode value.  In that case, we
-;;; end up with a scratch register that never gets allocated.  To avoid this,
-;;; we use global register 1 which is never otherwise used by gcc as a temp.
-;;; The correct solution here might be to force DImode constants to memory,
-;;; e.g. by using a toc like the romp and rs6000 ports do for addresses, reg
-;;; 1 will then no longer need to be considered a fixed reg.
-
-(define_expand "sethi_di_sp64"
-  [(parallel
-     [(set (match_operand:DI 0 "register_operand" "")
-	   (high:DI (match_operand 1 "general_operand" "")))
-      (clobber (reg:DI 1))])]
+  /* This makes sure we will not get rematched due to splittage. */
+  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], HImode))
+    ;
+  else if (CONSTANT_P (operands[1])
+	   && GET_CODE (operands[1]) != HIGH
+	   && GET_CODE (operands[1]) != LO_SUM)
+    {
+      sparc_emit_set_const32 (operands[0], operands[1]);
+      DONE;
+    }
+ movhi_is_ok:
+  ;
+}")
+
+(define_insn "*movhi_const64_special"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operand:HI 1 "const64_high_operand" ""))]
   "TARGET_ARCH64"
-  "")
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
 
-(define_insn "*sethi_di_sp64_const"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(high:DI (match_operand 1 "const_double_operand" "")))
-   (clobber (reg:DI 1))]
-  "TARGET_ARCH64 && check_pic (1)"
-  "*
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "general_operand" "=r,r,r,m")
+	(match_operand:HI 1 "input_operand"   "rI,K,m,rJ"))]
+  "(register_operand (operands[0], HImode)
+    || reg_or_0_operand (operands[1], HImode))"
+  "@
+   mov\\t%1, %0
+   sethi\\t%%hi(%a1), %0
+   lduh\\t%1, %0
+   sth\\t%r1, %0"
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "length" "1")])
+
+;; We always work with constants here.
+(define_insn "*movhi_lo_sum"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (match_operand:HI 1 "arith_operand" "%r")
+                (match_operand:HI 2 "arith_operand" "I")))]
+  ""
+  "or\\t%1, %2, %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
 {
-#if HOST_BITS_PER_WIDE_INT == 32
-  rtx high, low;
-  
-  split_double (operands[1], &high, &low);
+  /* Working with CONST_INTs is easier, so convert
+     a double if needed.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    operands[1] = GEN_INT (CONST_DOUBLE_LOW (operands[1]));
 
-  if (high == const0_rtx)
-    {
-      operands[1] = low;
-      output_asm_insn (\"sethi %%hi(%a1),%0\", operands);
-    }
-  else
+  /* Handle sets of MEM first.  */
+  if (GET_CODE (operands[0]) == MEM)
     {
-      operands[1] = high;
-      output_asm_insn (singlemove_string (operands), operands);
+      /* This checks TARGET_LIVE_G0 for us.  */
+      if (reg_or_0_operand (operands[1], SImode))
+	goto movsi_is_ok;
 
-      operands[1] = low;
-      output_asm_insn (\"sllx %0,32,%0\", operands);
-      if (low != const0_rtx)
-	output_asm_insn (\"sethi %%hi(%a1),%%g1; or %0,%%g1,%0\", operands);
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (SImode, operands[1]);
+	}
     }
-#else
-  rtx op = operands[1];
 
-  if (! SPARC_SETHI_P (INTVAL(op)))
+  /* Fixup PIC cases.  */
+  if (flag_pic)
     {
-      operands[1] = GEN_INT (INTVAL (op) >> 32);
-      output_asm_insn (singlemove_string (operands), operands);
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], SImode, 0);
+
+      if (GET_CODE (operands[1]) == LABEL_REF)
+	{
+	  /* shit */
+	  emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
+	  DONE;
+	}
 
-      output_asm_insn (\"sllx %0,32,%0\", operands);
-      if (INTVAL (op) & 0xffffffff)
+      if (symbolic_operand (operands[1], SImode))
 	{
-	  operands[1] = GEN_INT (INTVAL (op) & 0xffffffff);
-	  output_asm_insn (\"sethi %%hi(%a1),%%g1; or %0,%%g1,%0\", operands);
+	  operands[1] = legitimize_pic_address (operands[1],
+						SImode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	  goto movsi_is_ok;
 	}
     }
-  else
+
+  /* If we are trying to toss an integer constant into the
+     FPU registers, force it into memory.  */
+  if (GET_CODE (operands[0]) == REG
+      && REGNO (operands[0]) >= SPARC_FIRST_FP_REG
+      && REGNO (operands[0]) <= SPARC_LAST_V9_FP_REG
+      && CONSTANT_P (operands[1]))
+    operands[1] = validize_mem (force_const_mem (GET_MODE (operands[0]),
+						 operands[1]));
+
+  /* This makes sure we will not get rematched due to splittage. */
+  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], SImode))
+    ;
+  else if (CONSTANT_P (operands[1])
+	   && GET_CODE (operands[1]) != HIGH
+	   && GET_CODE (operands[1]) != LO_SUM)
     {
-      output_asm_insn (\"sethi %%hi(%a1),%0\", operands);
+      sparc_emit_set_const32 (operands[0], operands[1]);
+      DONE;
     }
-#endif
+ movsi_is_ok:
+  ;
+}")
 
-  return \"\";
-}"
-  [(set_attr "type" "move")
-   (set_attr "length" "5")])
-
-;; Most of the required support for the various code models is here.
-;; We can do this because sparcs need the high insn to load the address.  We
-;; just need to get high to do the right thing for each code model.  Then each
-;; uses the same "%X+%lo(...)" in the load/store insn, though in the case of
-;; the medium/middle code model "%lo" is written "%l44".
-
-;; When TARGET_CM_MEDLOW, assume that the upper 32 bits of symbol addresses are
-;; always 0.
-;; When TARGET_CM_MEDMID, the executable must be in the low 16 TB of memory.
-;; This corresponds to the low 44 bits, and the %[hml]44 relocs are used.
-;; ??? Not implemented yet.
-;; When TARGET_CM_EMBMEDANY, the text and data segments have a maximum size of
-;; 31 bits and may be located anywhere.  EMBMEDANY_BASE_REG contains the start
-;; address of the data segment, currently %g4.
-;; When TARGET_CM_MEDANY, the text and data segments have a maximum size of 31
-;; bits and may be located anywhere.  The maximum offset from any instruction
-;; to the label _GLOBAL_OFFSET_TABLE_ is 31 bits.
+;; Special LIVE_G0 pattern to obtain zero in a register.
+(define_insn "*movsi_zero_liveg0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operand:SI 1 "zero_operand" "J"))]
+  "TARGET_LIVE_G0"
+  "and\\t%0, 0, %0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "1")])
 
-(define_insn "*sethi_di_medlow"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(high:DI (match_operand 1 "" "")))
-  ;; The clobber is here because emit_move_sequence assumes the worst case.
-   (clobber (reg:DI 1))]
-  "TARGET_CM_MEDLOW && check_pic (1)"
-  "sethi %%hi(%a1),%0"
+;; This is needed to show CSE exactly which bits are set
+;; in a 64-bit register by sethi instructions.
+(define_insn "*movsi_const64_special"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "const64_high_operand" ""))]
+  "TARGET_ARCH64"
+  "sethi\\t%%hi(%a1), %0"
   [(set_attr "type" "move")
    (set_attr "length" "1")])
 
-(define_insn "*sethi_di_medium_pic"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(high:DI (match_operand 1 "sp64_medium_pic_operand" "")))]
-  "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && check_pic (1)"
-  "sethi %%hi(%a1),%0"
-  [(set_attr "type" "move")
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "general_operand" "=r,f,r,r,r,f,m,m,d")
+	(match_operand:SI 1 "input_operand"   "rI,!f,K,J,m,!m,rJ,!f,J"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))"
+  "@
+   mov\\t%1, %0
+   fmovs\\t%1, %0
+   sethi\\t%%hi(%a1), %0
+   clr\\t%0
+   ld\\t%1, %0
+   ld\\t%1, %0
+   st\\t%r1, %0
+   st\\t%1, %0
+   fzeros\\t%0"
+  [(set_attr "type" "move,fpmove,move,move,load,fpload,store,fpstore,fpmove")
    (set_attr "length" "1")])
 
-;; WARNING: %0 gets %hi(%1)+%g4.
-;;          You cannot OR in %lo(%1), it must be added in.
+(define_insn "*movsi_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "immediate_operand" "in")))]
+  ""
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
 
-(define_insn "*sethi_di_embmedany_data"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(high:DI (match_operand 1 "data_segment_operand" "")))
-  ;; The clobber is here because emit_move_sequence assumes the worst case.
-   (clobber (reg:DI 1))]
-  "TARGET_CM_EMBMEDANY && check_pic (1)"
-  "sethi %%hi(%a1),%0; add %0,%_,%0"
+(define_insn "*movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" "in")))]
+  ""
+  "sethi\\t%%hi(%a1), %0"
   [(set_attr "type" "move")
-   (set_attr "length" "2")])
+   (set_attr "length" "1")])
 
-(define_insn "*sethi_di_embmedany_text"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(high:DI (match_operand 1 "text_segment_operand" "")))
-  ;; The clobber is here because emit_move_sequence assumes the worst case.
-   (clobber (reg:DI 1))]
-  "TARGET_CM_EMBMEDANY && check_pic (1)"
-  "sethi %%uhi(%a1),%%g1; or %%g1,%%ulo(%a1),%%g1; sllx %%g1,32,%%g1; sethi %%hi(%a1),%0; or %0,%%g1,%0"
+;; The next two patterns must wrap the SYMBOL_REF in an UNSPEC
+;; so that CSE won't optimize the address computation away.
+(define_insn "movsi_lo_sum_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+                   (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] 0)))]
+  "flag_pic"
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "movsi_high_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "" "")] 0)))]
+  "flag_pic && check_pic (1)"
+  "sethi\\t%%hi(%a1), %0"
   [(set_attr "type" "move")
-   (set_attr "length" "5")])
-
-;; Move instructions
+   (set_attr "length" "1")])
 
-(define_expand "movqi"
-  [(set (match_operand:QI 0 "general_operand" "")
-	(match_operand:QI 1 "general_operand" ""))]
-  ""
+(define_expand "movsi_pic_label_ref"
+  [(set (match_dup 3) (high:SI
+     (unspec:SI [(match_operand:SI 1 "label_ref_operand" "")
+		 (match_dup 2)] 5)))
+   (set (match_dup 4) (lo_sum:SI (match_dup 3)
+     (unspec:SI [(match_dup 1) (match_dup 2)] 5)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 5) (match_dup 4)))]
+  "flag_pic"
   "
 {
-  if (emit_move_sequence (operands, QImode))
-    DONE;
+  current_function_uses_pic_offset_table = 1;
+  operands[2] = gen_rtx_SYMBOL_REF (Pmode, \"_GLOBAL_OFFSET_TABLE_\");
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = pic_offset_table_rtx;
 }")
 
-(define_insn "*movqi_insn"
-  [(set (match_operand:QI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q")
-	(match_operand:QI 1 "move_operand" "rI,K,Q,rJ"))]
-  "! TARGET_LIVE_G0
-   && (register_operand (operands[0], QImode)
-       || register_operand (operands[1], QImode)
-       || operands[1] == const0_rtx)"
-  "@
-   mov %1,%0
-   sethi %%hi(%a1),%0
-   ldub %1,%0
-   stb %r1,%0"
-  [(set_attr "type" "move,move,load,store")
+(define_insn "*movsi_high_pic_label_ref"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (high:SI
+        (unspec:SI [(match_operand:SI 1 "label_ref_operand" "")
+		    (match_operand:SI 2 "" "")] 5)))]
+  "flag_pic"
+  "sethi\\t%%hi(%a2-(%a1-.)), %0"
+  [(set_attr "type" "move")
    (set_attr "length" "1")])
 
-(define_insn "*movqi_insn_liveg0"
-  [(set (match_operand:QI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,r,r,Q")
-	(match_operand:QI 1 "move_operand" "r,J,I,K,Q,r"))]
-  "TARGET_LIVE_G0
-   && (register_operand (operands[0], QImode)
-       || register_operand (operands[1], QImode))"
-  "@
-   mov %1,%0
-   and %0,0,%0
-   and %0,0,%0\;or %0,%1,%0
-   sethi %%hi(%a1),%0
-   ldub %1,%0
-   stb %1,%0"
-  [(set_attr "type" "move,move,move,move,load,store")
-   (set_attr "length" "1,1,2,1,1,1")])
-
-(define_insn "*lo_sum_qi"
-  [(set (match_operand:QI 0 "register_operand" "=r")
-	(subreg:QI (lo_sum:SI (match_operand:QI 1 "register_operand" "r")
-			      (match_operand 2 "immediate_operand" "in")) 0))]
-  ""
-  "or %1,%%lo(%a2),%0"
-  [(set_attr "length" "1")])
-
-(define_insn "*store_qi"
-  [(set (mem:QI (match_operand:SI 0 "symbolic_operand" ""))
-	(match_operand:QI 1 "reg_or_0_operand" "rJ"))
-   (clobber (match_scratch:SI 2 "=&r"))]
-  "(reload_completed || reload_in_progress)
-   && ! TARGET_PTR64"
-  "sethi %%hi(%a0),%2\;stb %r1,[%2+%%lo(%a0)]"
-  [(set_attr "type" "store")
-   (set_attr "length" "2")])
+(define_insn "*movsi_lo_sum_pic_label_ref"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 2 "label_ref_operand" "")
+		    (match_operand:SI 3 "" "")] 5)))]
+  "flag_pic"
+  "or\\t%1, %%lo(%a3-(%a2-.)), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
 
-(define_expand "movhi"
-  [(set (match_operand:HI 0 "general_operand" "")
-	(match_operand:HI 1 "general_operand" ""))]
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
   ""
   "
 {
-  if (emit_move_sequence (operands, HImode))
-    DONE;
+  /* Where possible, convert CONST_DOUBLE into a CONST_INT.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+#if HOST_BITS_PER_WIDE_INT == 32
+      && ((CONST_DOUBLE_HIGH (operands[1]) == 0
+	   && (CONST_DOUBLE_LOW (operands[1]) & 0x80000000) == 0)
+	  || (CONST_DOUBLE_HIGH (operands[1]) == (HOST_WIDE_INT) 0xffffffff
+	      && (CONST_DOUBLE_LOW (operands[1]) & 0x80000000) != 0))
+#endif
+      )
+    operands[1] = GEN_INT (CONST_DOUBLE_LOW (operands[1]));
+
+  /* Handle MEM cases first.  */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      /* If it's a REG, we can always do it.
+	 The const zero case is more complex, on v9
+	 we can always perform it.  */
+      if (register_operand (operands[1], DImode)
+	  || (TARGET_ARCH64
+              && (operands[1] == const0_rtx)))
+        goto movdi_is_ok;
+
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (DImode, operands[1]);
+	}
+    }
+
+  if (flag_pic)
+    {
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], DImode, 0);
+
+      if (GET_CODE (operands[1]) == LABEL_REF)
+        {
+          if (! TARGET_ARCH64)
+            abort ();
+          emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
+          DONE;
+        }
+
+      if (symbolic_operand (operands[1], DImode))
+	{
+	  operands[1] = legitimize_pic_address (operands[1],
+						DImode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	  goto movdi_is_ok;
+	}
+    }
+
+  /* If we are trying to toss an integer constant into the
+     FPU registers, force it into memory.  */
+  if (GET_CODE (operands[0]) == REG
+      && REGNO (operands[0]) >= SPARC_FIRST_FP_REG
+      && REGNO (operands[0]) <= SPARC_LAST_V9_FP_REG
+      && CONSTANT_P (operands[1]))
+    operands[1] = validize_mem (force_const_mem (GET_MODE (operands[0]),
+						 operands[1]));
+
+  /* This makes sure we will not get rematched due to splittage. */
+  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], DImode))
+    ;
+  else if (TARGET_ARCH64
+	   && CONSTANT_P (operands[1])
+           && GET_CODE (operands[1]) != HIGH
+           && GET_CODE (operands[1]) != LO_SUM)
+    {
+      sparc_emit_set_const64 (operands[0], operands[1]);
+      DONE;
+    }
+
+ movdi_is_ok:
+  ;
 }")
 
-(define_insn "*movhi_insn"
-  [(set (match_operand:HI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q")
-	(match_operand:HI 1 "move_operand" "rI,K,Q,rJ"))]
-  "! TARGET_LIVE_G0
-   && (register_operand (operands[0], HImode)
-       || register_operand (operands[1], HImode)
-       || operands[1] == const0_rtx)"
+;; Be careful, fmovd does not exist when !arch64.
+;; We match MEM moves directly when we have correct even
+;; numbered registers, but fall into splits otherwise.
+;; The constraint ordering here is really important to
+;; avoid insane problems in reload, especially for patterns
+;; of the form:
+;;
+;; (set (mem:DI (plus:SI (reg:SI 30 %fp)
+;;                       (const_int -5016)))
+;;      (reg:DI 2 %g2))
+;;
+(define_insn "*movdi_insn_sp32"
+  [(set (match_operand:DI 0 "general_operand" "=T,U,o,r,r,r,?T,?f,?f,?o,?f")
+        (match_operand:DI 1 "input_operand"    "U,T,r,o,i,r,f,T,o,f,f"))]
+  "! TARGET_ARCH64 &&
+   (register_operand (operands[0], DImode)
+    || register_operand (operands[1], DImode))"
   "@
-   mov %1,%0
-   sethi %%hi(%a1),%0
-   lduh %1,%0
-   sth %r1,%0"
-  [(set_attr "type" "move,move,load,store")
+   std\\t%1, %0
+   ldd\\t%1, %0
+   #
+   #
+   #
+   #
+   std\\t%1, %0
+   ldd\\t%1, %0
+   #
+   #
+   #"
+  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,*,*,*")
+   (set_attr "length" "1,1,2,2,2,2,1,1,2,2,2")])
+
+;; The following are generated by sparc_emit_set_const64
+(define_insn "*movdi_sp64_dbl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (match_operand:DI 1 "const64_operand" ""))]
+  "(TARGET_ARCH64
+    && HOST_BITS_PER_WIDE_INT != 64)"
+  "mov\\t%1, %0"
+  [(set_attr "type" "move")
    (set_attr "length" "1")])
 
-(define_insn "*movhi_insn_liveg0"
-  [(set (match_operand:HI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,r,r,Q")
-	(match_operand:HI 1 "move_operand" "r,J,I,K,Q,r"))]
-  "TARGET_LIVE_G0
-   && (register_operand (operands[0], HImode)
-       || register_operand (operands[1], HImode))"
-  "@
-   mov %1,%0
-   and %0,0,%0
-   and %0,0,%0\;or %0,%1,%0
-   sethi %%hi(%a1),%0
-   lduh %1,%0
-   sth %1,%0"
-  [(set_attr "type" "move,move,move,move,load,store")
-   (set_attr "length" "1,1,2,1,1,1")])
-
-(define_insn "*lo_sum_hi"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(lo_sum:HI (match_operand:HI 1 "register_operand" "r")
-		   (match_operand 2 "immediate_operand" "in")))]
-  ""
-  "or %1,%%lo(%a2),%0"
-  [(set_attr "length" "1")])
+;; This is needed to show CSE exactly which bits are set
+;; in a 64-bit register by sethi instructions.
+(define_insn "*movdi_const64_special"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "const64_high_operand" ""))]
+  "TARGET_ARCH64"
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
 
-(define_insn "*store_hi"
-  [(set (mem:HI (match_operand:SI 0 "symbolic_operand" ""))
-	(match_operand:HI 1 "reg_or_0_operand" "rJ"))
-   (clobber (match_scratch:SI 2 "=&r"))]
-  "(reload_completed || reload_in_progress)
-   && ! TARGET_PTR64"
-  "sethi %%hi(%a0),%2\;sth %r1,[%2+%%lo(%a0)]"
-  [(set_attr "type" "store")
-   (set_attr "length" "2")])
+(define_insn "*movdi_insn_sp64"
+  [(set (match_operand:DI 0 "general_operand" "=r,r,r,r,m,?e,?e,?m,b")
+        (match_operand:DI 1 "input_operand"   "rI,K,J,m,rJ,e,m,e,J"))]
+  "TARGET_ARCH64 &&
+   (register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))"
+  "@
+   mov\\t%1, %0
+   sethi\\t%%hi(%a1), %0
+   clr\\t%0
+   ldx\\t%1, %0
+   stx\\t%r1, %0
+   fmovd\\t%1, %0
+   ldd\\t%1, %0
+   std\\t%1, %0
+   fzero\\t%0"
+  [(set_attr "type" "move,move,move,load,store,fpmove,fpload,fpstore,fpmove")
+   (set_attr "length" "1")])
 
-(define_expand "movsi"
-  [(set (match_operand:SI 0 "general_operand" "")
-	(match_operand:SI 1 "general_operand" ""))]
-  ""
+(define_expand "movdi_pic_label_ref"
+  [(set (match_dup 3) (high:DI
+     (unspec:DI [(match_operand:DI 1 "label_ref_operand" "")
+                 (match_dup 2)] 5)))
+   (set (match_dup 4) (lo_sum:DI (match_dup 3)
+     (unspec:DI [(match_dup 1) (match_dup 2)] 5)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (minus:DI (match_dup 5) (match_dup 4)))]
+  "TARGET_ARCH64 && flag_pic"
   "
 {
-  if (emit_move_sequence (operands, SImode))
-    DONE;
+  current_function_uses_pic_offset_table = 1;
+  operands[2] = gen_rtx_SYMBOL_REF (Pmode, \"_GLOBAL_OFFSET_TABLE_\");
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = pic_offset_table_rtx;
 }")
 
-;; We must support both 'r' and 'f' registers here, because combine may
-;; convert SFmode hard registers to SImode hard registers when simplifying
-;; subreg sets.
+(define_insn "*movdi_high_pic_label_ref"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI
+          (unspec:DI [(match_operand:DI 1 "label_ref_operand" "")
+                      (match_operand:DI 2 "" "")] 5)))]
+  "TARGET_ARCH64 && flag_pic"
+  "sethi\\t%%hi(%a2-(%a1-.)), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
 
-;; We cannot combine the similar 'r' and 'f' constraints, because it causes
-;; problems with register allocation.  Reload might try to put an integer
-;; in an fp register, or an fp number is an integer register.
+(define_insn "*movdi_lo_sum_pic_label_ref"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 2 "label_ref_operand" "")
+                    (match_operand:DI 3 "" "")] 5)))]
+  "TARGET_ARCH64 && flag_pic"
+  "or\\t%1, %%lo(%a3-(%a2-.)), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
 
-(define_insn "*movsi_insn"
-  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,f,r,r,f,Q,Q,d")
-	(match_operand:SI 1 "move_operand" "rI,!f,K,Q,!Q,rJ,!f,J"))]
-  "! TARGET_LIVE_G0
-   && (register_operand (operands[0], SImode)
-       || register_operand (operands[1], SImode)
-       || operands[1] == const0_rtx)
-   && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
-       || REGNO (operands[0]) < 32
-       || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
-  "@
-   mov %1,%0
-   fmovs %1,%0
-   sethi %%hi(%a1),%0
-   ld %1,%0
-   ld %1,%0
-   st %r1,%0
-   st %1,%0
-   fzeros %0"
-  [(set_attr "type" "move,fpmove,move,load,fpload,store,fpstore,fpmove")
-   (set_attr "length" "1")])
-
-(define_insn "*movsi_insn_liveg0"
-  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,f,r,r,f,Q,Q")
-	(match_operand:SI 1 "move_operand" "r,J,I,!f,K,Q,!Q,r,!f"))]
-  "TARGET_LIVE_G0
-   && (register_operand (operands[0], SImode)
-       || register_operand (operands[1], SImode))"
-  "@
-   mov %1,%0
-   and %0,0,%0
-   and %0,0,%0\;or %0,%1,%0
-   fmovs %1,%0
-   sethi %%hi(%a1),%0
-   ld %1,%0
-   ld %1,%0
-   st %1,%0
-   st %1,%0"
-  [(set_attr "type" "move,move,move,fpmove,move,load,fpload,store,fpstore")
-   (set_attr "length" "1,1,2,1,1,1,1,1,1")])
-
-(define_insn "*store_si"
-  [(set (mem:SI (match_operand:SI 0 "symbolic_operand" ""))
-	(match_operand:SI 1 "reg_or_0_operand" "rJ"))
-   (clobber (match_scratch:SI 2 "=&r"))]
-  "(reload_completed || reload_in_progress)
-   && ! TARGET_PTR64"
-  "sethi %%hi(%a0),%2\;st %r1,[%2+%%lo(%a0)]"
-  [(set_attr "type" "store")
-   (set_attr "length" "2")])
+;; Sparc-v9 code model support insns.  See sparc_emit_set_symbolic_const64
+;; in sparc.c to see what is going on here... PIC stuff comes first.
 
-(define_expand "movdi"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "")
-	(match_operand:DI 1 "general_operand" ""))]
-  ""
+(define_insn "movdi_lo_sum_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "immediate_operand" "in")] 0)))]
+  "TARGET_ARCH64 && flag_pic"
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "movdi_high_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand 1 "" "")] 0)))]
+  "TARGET_ARCH64 && flag_pic && check_pic (1)"
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "*sethi_di_medlow_embmedany_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (match_operand:DI 1 "sp64_medium_pic_operand" "")))]
+  "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && check_pic (1)"
+  "sethi\\t%%lo(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "*sethi_di_medlow"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (match_operand:DI 1 "symbolic_operand" "")))]
+  "TARGET_CM_MEDLOW && check_pic (1)"
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "*losum_di_medlow"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDLOW"
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "seth44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] 6)))]
+  "TARGET_CM_MEDMID"
+  "sethi\\t%%h44(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "setm44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] 7)))]
+  "TARGET_CM_MEDMID"
+  "or\\t%1, %%m44(%a2), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "setl44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDMID"
+  "or\\t%1, %%l44(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "sethh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] 9)))]
+  "TARGET_CM_MEDANY"
+  "sethi\\t%%hh(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "setlm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] 10)))]
+  "TARGET_CM_MEDANY"
+  "sethi\\t%%lm(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "sethm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] 18)))]
+  "TARGET_CM_MEDANY"
+  "or\\t%1, %%hm(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "setlo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDANY"
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "embmedany_sethi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "data_segment_operand" "")] 11)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "embmedany_losum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "data_segment_operand" "")))]
+  "TARGET_CM_EMBMEDANY"
+  "add\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "embmedany_brsum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "r")] 11))]
+  "TARGET_CM_EMBMEDANY"
+  "add\\t%1, %_, %0"
+  [(set_attr "length" "1")])
+
+(define_insn "embmedany_textuhi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] 13)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\\t%%uhi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "embmedany_texthi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] 14)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "embmedany_textulo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")] 15)))]
+  "TARGET_CM_EMBMEDANY"
+  "or\\t%1, %%ulo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+(define_insn "embmedany_textlo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "text_segment_operand" "")))]
+  "TARGET_CM_EMBMEDANY"
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
+
+;; Now some patterns to help reload out a bit.
+(define_expand "reload_indi"
+  [(parallel [(match_operand:DI 0 "register_operand" "=r")
+              (match_operand:DI 1 "immediate_operand" "")
+              (match_operand:TI 2 "register_operand" "=&r")])]
+  "(TARGET_CM_MEDANY
+    || TARGET_CM_EMBMEDANY)
+   && ! flag_pic"
   "
 {
-  if (emit_move_sequence (operands, DImode))
-    DONE;
+  sparc_emit_set_symbolic_const64 (operands[0], operands[1],
+                                   gen_rtx_REG (DImode, REGNO (operands[2])));
+  DONE;
 }")
 
-;; 32 bit V9 movdi is like regular 32 bit except: a 64 bit zero can be stored
-;; to aligned memory with a single instruction, the ldd/std instructions
-;; are not used, and constants can not be moved to floating point registers.
-
-(define_insn "*movdi_sp32_v9"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,?e,?e,?Q,?b")
-	(match_operand:DI 1 "general_operand" "r,J,r,Q,i,e,Q,e,J"))]
-  "TARGET_V9 && ! TARGET_ARCH64
-   && (register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode)
-       || operands[1] == const0_rtx)
-   && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
-       || REGNO (operands[0]) < 32
-       || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
-  "*
+(define_expand "reload_outdi"
+  [(parallel [(match_operand:DI 0 "register_operand" "=r")
+              (match_operand:DI 1 "immediate_operand" "")
+              (match_operand:TI 2 "register_operand" "=&r")])]
+  "(TARGET_CM_MEDANY
+    || TARGET_CM_EMBMEDANY)
+   && ! flag_pic"
+  "
 {
-  if (which_alternative == 1)
-    return \"stx %%g0,%0\";
-  if (which_alternative == 8)
-    return \"fzero %0\";
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1]))
-    return output_fp_move_double (operands);
-  return output_move_double (operands);
-}"
-  [(set_attr "type" "move,store,store,load,multi,fp,fpload,fpstore,fpmove")
-   (set_attr "length" "2,1,3,3,3,2,3,3,1")])
+  sparc_emit_set_symbolic_const64 (operands[0], operands[1],
+                                   gen_rtx_REG (DImode, REGNO (operands[2])));
+  DONE;
+}")
 
-;; SPARC V9 deprecates std.  Split it here.
+;; Split up putting CONSTs and REGs into DI regs when !arch64
 (define_split
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-      (match_operand:DI 1 "register_operand" "r"))]
-  "TARGET_V9 && ! TARGET_ARCH64 && reload_completed
-   && REGNO (operands[1]) < 32 && ! MEM_VOLATILE_P (operands[0])
-   && offsettable_memref_p (operands[0])"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (match_dup 5))]
-  "operands[3] = gen_highpart (SImode, operands[1]);
-   operands[5] = gen_lowpart (SImode, operands[1]);
-   operands[4] = adj_offsettable_operand (operands[0], 4);
-   PUT_MODE (operands[4], SImode);
-   operands[2] = copy_rtx (operands[0]);
-   PUT_MODE (operands[2], SImode);")
-
-;; Split register to register moves.
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "const_int_operand" ""))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(clobber (const_int 0))]
+  "
+{
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			(INTVAL (operands[1]) < 0) ?
+			constm1_rtx :
+			const0_rtx));
+  emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			operands[1]));
+  DONE;
+}")
+
 (define_split
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(match_operand:DI 1 "arith_double_operand" "rIN"))]
-  "! TARGET_ARCH64
-   && REGNO (operands[0]) < 32
-   && GET_CODE (operands[1]) == REG && REGNO (operands[1]) < 32
-   && ! reg_overlap_mentioned_p (operands[0], operands[1])"
-  [(set (match_dup 2) (match_dup 4))
-   (set (match_dup 3) (match_dup 5))]
-  "operands[2] = gen_highpart (SImode, operands[0]);
-   operands[3] = gen_lowpart (SImode, operands[0]);
-   operands[4] = gen_highpart (SImode, operands[1]);
-   operands[5] = gen_lowpart (SImode, operands[1]);")
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "const_double_operand" ""))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(clobber (const_int 0))]
+  "
+{
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			GEN_INT (CONST_DOUBLE_HIGH (operands[1]))));
 
-(define_insn "*movdi_sp32"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
-	(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
-  "! TARGET_V9
-   && (register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode)
-       || operands[1] == const0_rtx)"
-  "*
+  /* Slick... but this trick loses if this subreg constant part
+     can be done in one insn.  */
+  if (CONST_DOUBLE_LOW (operands[1]) == CONST_DOUBLE_HIGH (operands[1])
+      && !(SPARC_SETHI_P (CONST_DOUBLE_HIGH (operands[1]))
+	   || SPARC_SIMM13_P (CONST_DOUBLE_HIGH (operands[1]))))
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			    gen_highpart (SImode, operands[0])));
+    }
+  else
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			    GEN_INT (CONST_DOUBLE_LOW (operands[1]))));
+    }
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(clobber (const_int 0))]
+  "
 {
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1]))
-    return output_fp_move_double (operands);
-  return output_move_double (operands);
-}"
-  [(set_attr "type" "move,store,load,store,load,multi,fp,fpload,fpstore")
-   (set_attr "length" "2,1,1,3,3,3,2,3,3")])
-
-;;; ??? The trick used below can be extended to load any negative 32 bit
-;;; constant in two instructions.  Currently the compiler will use HIGH/LO_SUM
-;;; for anything not matching the HIK constraints, which results in 5
-;;; instructions.  Positive 32 bit constants can be loaded in the obvious way
-;;; with sethi/ori.  To extend the trick, in the xor instruction, use 
-;;; xor %o0, ((op1 & 0x3ff) | -0x400), %o0
-;;; This needs the original value of operands[1], not the inverted value.
-
-(define_insn "*movdi_sp64_insn"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?e,?e,?Q")
-	(match_operand:DI 1 "move_operand" "rI,K,Q,rJ,e,Q,e"))]
-  "TARGET_ARCH64
-   && (register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode)
-       || operands[1] == const0_rtx)"
-  "*
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  if (GET_CODE (set_dest) == SUBREG)
+    set_dest = alter_subreg (set_dest);
+  if (GET_CODE (set_src) == SUBREG)
+    set_src = alter_subreg (set_src);
+
+  dest1 = gen_highpart (SImode, set_dest);
+  dest2 = gen_lowpart (SImode, set_dest);
+  src1 = gen_highpart (SImode, set_src);
+  src2 = gen_lowpart (SImode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movsi (dest2, src2));
+      emit_insn (gen_movsi (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movsi (dest1, src1));
+      emit_insn (gen_movsi (dest2, src2));
+    }
+  DONE;
+}")
+
+;; Now handle the cases of memory moves from/to non-even
+;; DI mode register pairs.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "(! TARGET_ARCH64
+    && reload_completed
+    && sparc_splitdi_legitimate (operands[0], operands[1]))"
+  [(clobber (const_int 0))]
+  "
 {
-  switch (which_alternative)
+  rtx word0 = change_address (operands[1], SImode, NULL_RTX);
+  rtx word1 = change_address (operands[1], SImode,
+			      plus_constant_for_output (XEXP (word0, 0), 4));
+  rtx high_part = gen_highpart (SImode, operands[0]);
+  rtx low_part = gen_lowpart (SImode, operands[0]);
+
+  if (reg_overlap_mentioned_p (high_part, word1))
     {
-    case 0:
-      return \"mov %1,%0\";
-    case 1:
-      /* Sethi does not sign extend, so we must use a little trickery
-	 to use it for negative numbers.  Invert the constant before
-	 loading it in, then use a xor immediate to invert the loaded bits
-	 (along with the upper 32 bits) to the desired constant.  This
-	 works because the sethi and immediate fields overlap.  */
-
-      if ((INTVAL (operands[1]) & 0x80000000) == 0)
-	return \"sethi %%hi(%a1),%0\";
-      else
-	{
-	  operands[1] = GEN_INT (~INTVAL (operands[1]));
-	  output_asm_insn (\"sethi %%hi(%a1),%0\", operands);
-	  /* The low 10 bits are already zero, but invert the rest.
-	     Assemblers don't accept 0x1c00, so use -0x400 instead.  */
-	  return \"xor %0,-0x400,%0\";
-	}
-    case 2:
-      return \"ldx %1,%0\";
-    case 3:
-      return \"stx %r1,%0\";
-    case 4:
-      return \"fmovd %1,%0\";
-    case 5:
-      return \"ldd %1,%0\";
-    case 6:
-      return \"std %1,%0\";
-    default:
-      abort ();
+      emit_insn (gen_movsi (low_part, word1));
+      emit_insn (gen_movsi (high_part, word0));
     }
-}"
-  [(set_attr "type" "move,move,load,store,fp,fpload,fpstore")
-   (set_attr "length" "1,2,1,1,1,1,1")])
+  else
+    {
+      emit_insn (gen_movsi (high_part, word0));
+      emit_insn (gen_movsi (low_part, word1));
+    }
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "(! TARGET_ARCH64
+    && reload_completed
+    && sparc_splitdi_legitimate (operands[1], operands[0]))"
+  [(clobber (const_int 0))]
+  "
+{
+  rtx word0 = change_address (operands[0], SImode, NULL_RTX);
+  rtx word1 = change_address (operands[0], SImode,
+			      plus_constant_for_output (XEXP (word0, 0), 4));
+  rtx high_part = gen_highpart (SImode, operands[1]);
+  rtx low_part = gen_lowpart (SImode, operands[1]);
+
+  emit_insn (gen_movsi (word0, high_part));
+  emit_insn (gen_movsi (word1, low_part));
+  DONE;
+}")
 
-;; ??? There's no symbolic (set (mem:DI ...) ...).
-;; Experimentation with v9 suggested one isn't needed.
 
 ;; Floating point move insns
 
-;; This pattern forces (set (reg:SF ...) (const_double ...))
-;; to be reloaded by putting the constant into memory.
-;; It must come before the more general movsf pattern.
-(define_insn "*movsf_const_insn"
-  [(set (match_operand:SF 0 "general_operand" "=f,d,m,?r")
-	(match_operand:SF 1 "" "m,G,G,?F"))]
+(define_insn "*clear_sf"
+  [(set (match_operand:SF 0 "general_operand" "=f")
+        (match_operand:SF 1 "" ""))]
+  "TARGET_VIS
+   && GET_CODE (operands[1]) == CONST_DOUBLE
+   && GET_CODE (operands[0]) == REG
+   && fp_zero_operand (operands[1])"
+  "fzeros\\t%0"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1")])
+
+(define_insn "*movsf_const_intreg"
+  [(set (match_operand:SF 0 "general_operand" "=f,r")
+        (match_operand:SF 1 ""                 "m,F"))]
   "TARGET_FPU
    && GET_CODE (operands[1]) == CONST_DOUBLE
-   && (GET_CODE (operands[0]) == REG
-       || fp_zero_operand (operands[1]))"
+   && GET_CODE (operands[0]) == REG"
   "*
 {
-  switch (which_alternative)
+  REAL_VALUE_TYPE r;
+  long i;
+
+  if (which_alternative == 0)
+    return \"ld\\t%1, %0\";
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  if (SPARC_SIMM13_P (i) || SPARC_SETHI_P (i))
     {
-    case 0:
-      return \"ld %1,%0\";
-    case 1:
-      return \"fzeros %0\";
-    case 2:
-      return \"st %%g0,%0\";
-    case 3:
-      return singlemove_string (operands);
-    default:
-      abort ();
+      operands[1] = GEN_INT (i);
+      if (SPARC_SIMM13_P (INTVAL (operands[1])))
+        return \"mov\\t%1, %0\";
+      else if (SPARC_SETHI_P (INTVAL (operands[1])))
+        return \"sethi\\t%%hi(%a1), %0\";
+      else
+        abort ();
     }
+  else
+    return \"#\";
 }"
-  [(set_attr "type" "fpload,fpmove,store,load")
-   (set_attr "length" "1,1,1,2")])
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+;; There isn't much I can do about this, if I change the
+;; mode then flow info gets really confused because the
+;; destination no longer looks the same.  Ho hum...
+(define_insn "*movsf_const_high"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unspec:SF [(match_operand 1 "const_int_operand" "")] 12))]
+  ""
+  "sethi\\t%%hi(%a1), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_insn "*movsf_const_lo"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unspec:SF [(match_operand 1 "register_operand" "r")
+                    (match_operand 2 "const_int_operand" "")] 17))]
+  ""
+  "or\\t%1, %%lo(%a2), %0"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "const_double_operand" ""))]
+  "TARGET_FPU
+   && (GET_CODE (operands[0]) == REG
+       && REGNO (operands[0]) < 32)"
+  [(set (match_dup 0) (unspec:SF [(match_dup 1)] 12))
+   (set (match_dup 0) (unspec:SF [(match_dup 0) (match_dup 1)] 17))]
+  "
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[1] = GEN_INT (i);
+}")
 
 (define_expand "movsf"
   [(set (match_operand:SF 0 "general_operand" "")
@@ -2327,88 +2884,193 @@
   ""
   "
 {
-  if (emit_move_sequence (operands, SFmode))
-    DONE;
+  /* Force SFmode constants into memory.  */
+  if (GET_CODE (operands[0]) == REG
+      && CONSTANT_P (operands[1]))
+    {
+      if (TARGET_VIS
+          && GET_CODE (operands[1]) == CONST_DOUBLE
+	  && fp_zero_operand (operands[1]))
+	goto movsf_is_ok;
+
+      /* emit_group_store will send such bogosity to us when it is
+         not storing directly into memory.  So fix this up to avoid
+         crashes in output_constant_pool.  */
+      if (operands [1] == const0_rtx)
+        operands[1] = CONST0_RTX (SFmode);
+      operands[1] = validize_mem (force_const_mem (GET_MODE (operands[0]),
+                                                   operands[1]));
+    }
+
+  /* Handle sets of MEM first.  */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      if (register_operand (operands[1], SFmode))
+	goto movsf_is_ok;
+
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (SFmode, operands[1]);
+	}
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], SFmode, 0);
+
+      if (symbolic_operand (operands[1], SFmode))
+	{
+	  operands[1] = legitimize_pic_address (operands[1],
+						SFmode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	}
+    }
+
+ movsf_is_ok:
+  ;
 }")
 
 (define_insn "*movsf_insn"
-  [(set (match_operand:SF 0 "reg_or_nonsymb_mem_operand" "=f,f,Q,r,r,Q")
-	(match_operand:SF 1 "reg_or_nonsymb_mem_operand"  "f,Q,f,r,Q,r"))]
+  [(set (match_operand:SF 0 "general_operand" "=f,f,m,r,r,m")
+	(match_operand:SF 1 "input_operand"    "f,m,f,r,m,r"))]
   "TARGET_FPU
    && (register_operand (operands[0], SFmode)
        || register_operand (operands[1], SFmode))"
   "@
-   fmovs %1,%0
-   ld %1,%0
-   st %1,%0
-   mov %1,%0
-   ld %1,%0
-   st %1,%0"
-  [(set_attr "type" "fpmove,fpload,fpstore,move,load,store")])
+   fmovs\\t%1, %0
+   ld\\t%1, %0
+   st\\t%1, %0
+   mov\\t%1, %0
+   ld\\t%1, %0
+   st\\t%1, %0"
+  [(set_attr "type" "fpmove,fpload,fpstore,move,load,store")
+   (set_attr "length" "1")])
 
 ;; Exactly the same as above, except that all `f' cases are deleted.
 ;; This is necessary to prevent reload from ever trying to use a `f' reg
 ;; when -mno-fpu.
 
 (define_insn "*movsf_no_f_insn"
-  [(set (match_operand:SF 0 "reg_or_nonsymb_mem_operand" "=r,r,Q")
-	(match_operand:SF 1 "reg_or_nonsymb_mem_operand" "r,Q,r"))]
+  [(set (match_operand:SF 0 "general_operand" "=r,r,m")
+	(match_operand:SF 1 "input_operand"    "r,m,r"))]
   "! TARGET_FPU
    && (register_operand (operands[0], SFmode)
        || register_operand (operands[1], SFmode))"
   "@
-   mov %1,%0
-   ld %1,%0
-   st %1,%0"
-  [(set_attr "type" "move,load,store")])
-
-(define_insn "*store_sf"
-  [(set (mem:SF (match_operand:SI 0 "symbolic_operand" "i"))
-	(match_operand:SF 1 "reg_or_0_operand" "rfG"))
-   (clobber (match_scratch:SI 2 "=&r"))]
-  "(reload_completed || reload_in_progress)
-   && ! TARGET_PTR64"
-  "sethi %%hi(%a0),%2\;st %r1,[%2+%%lo(%a0)]"
-  [(set_attr "type" "store")
-   (set_attr "length" "2")])
+   mov\\t%1, %0
+   ld\\t%1, %0
+   st\\t%1, %0"
+  [(set_attr "type" "move,load,store")
+   (set_attr "length" "1")])
 
-;; This pattern forces (set (reg:DF ...) (const_double ...))
-;; to be reloaded by putting the constant into memory.
-;; It must come before the more general movdf pattern.
+(define_insn "*clear_df"
+  [(set (match_operand:DF 0 "general_operand" "=e")
+        (match_operand:DF 1 "" ""))]
+  "TARGET_VIS
+   && GET_CODE (operands[1]) == CONST_DOUBLE
+   && GET_CODE (operands[0]) == REG
+   && fp_zero_operand (operands[1])"
+  "fzero\\t%0"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1")])
+
+(define_insn "*movdf_const_intreg_sp32"
+  [(set (match_operand:DF 0 "general_operand" "=e,e,r")
+        (match_operand:DF 1 ""                 "T,o,F"))]
+  "TARGET_FPU && ! TARGET_ARCH64
+   && GET_CODE (operands[1]) == CONST_DOUBLE
+   && GET_CODE (operands[0]) == REG"
+  "*
+{
+  if (which_alternative == 0)
+    return \"ldd\\t%1, %0\";
+  else
+    return \"#\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "1,2,2")])
 
-(define_insn "*movdf_const_insn"
-  [(set (match_operand:DF 0 "general_operand" "=?r,e,o,d")
-	(match_operand:DF 1 "" "?F,m,G,G"))]
+;; Now that we redo life analysis with a clean slate after
+;; instruction splitting for sched2 this can work.
+(define_insn "*movdf_const_intreg_sp64"
+  [(set (match_operand:DF 0 "general_operand" "=e,e,r")
+        (match_operand:DF 1 ""                 "m,o,F"))]
   "TARGET_FPU
+   && TARGET_ARCH64
    && GET_CODE (operands[1]) == CONST_DOUBLE
-   && (GET_CODE (operands[0]) == REG
-       || fp_zero_operand (operands[1]))"
+   && GET_CODE (operands[0]) == REG"
   "*
 {
-  switch (which_alternative)
+  if (which_alternative == 0)
+    return \"ldd\\t%1, %0\";
+  else
+    return \"#\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "1")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "const_double_operand" ""))]
+  "TARGET_FPU
+   && GET_CODE (operands[1]) == CONST_DOUBLE
+   && (GET_CODE (operands[0]) == REG
+       && REGNO (operands[0]) < 32)
+   && reload_completed"
+  [(clobber (const_int 0))]
+  "
+{
+  REAL_VALUE_TYPE r;
+  long l[2];
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+  if (GET_CODE (operands[0]) == SUBREG)
+    operands[0] = alter_subreg (operands[0]);
+  operands[0] = gen_rtx_raw_REG (DImode, REGNO (operands[0]));
+
+  if (TARGET_ARCH64)
     {
-    case 0:
-      return output_move_double (operands);
-    case 1:
-      return output_fp_move_double (operands);
-    case 2:
-      if (TARGET_ARCH64 || (TARGET_V9 && mem_aligned_8 (operands[0])))
-	{
-	  return \"stx %%g0,%0\";
-	}
+#if HOST_BITS_PER_WIDE_INT == 64
+      HOST_WIDE_INT val;
+
+      val = ((HOST_WIDE_INT)l[1] |
+             ((HOST_WIDE_INT)l[0] << 32));
+      emit_insn (gen_movdi (operands[0], GEN_INT (val)));
+#else
+      emit_insn (gen_movdi (operands[0],
+                            gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
+                                                  l[1], l[0])));
+#endif
+    }
+  else
+    {
+      emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			    GEN_INT (l[0])));
+
+      /* Slick... but this trick loses if this subreg constant part
+         can be done in one insn.  */
+      if (l[1] == l[0]
+          && !(SPARC_SETHI_P (l[0])
+	       || SPARC_SIMM13_P (l[0])))
+        {
+          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			        gen_highpart (SImode, operands[0])));
+        }
       else
-	{
-	  operands[1] = adj_offsettable_operand (operands[0], 4);
-	  return \"st %%g0,%0\;st %%g0,%1\";
-	}
-    case 3:
-      return \"fzero %0\";
-    default:
-      abort ();
+        {
+          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			        GEN_INT (l[1])));
+        }
     }
-}"
-  [(set_attr "type" "load,fpload,store,fpmove")
-   (set_attr "length" "3,3,3,1")])
+  DONE;
+}")
 
 (define_expand "movdf"
   [(set (match_operand:DF 0 "general_operand" "")
@@ -2416,126 +3078,271 @@
   ""
   "
 {
-  if (emit_move_sequence (operands, DFmode))
-    DONE;
+  /* Force DFmode constants into memory.  */
+  if (GET_CODE (operands[0]) == REG
+      && CONSTANT_P (operands[1]))
+    {
+      if (TARGET_VIS
+          && GET_CODE (operands[1]) == CONST_DOUBLE
+	  && fp_zero_operand (operands[1]))
+	goto movdf_is_ok;
+
+      /* emit_group_store will send such bogosity to us when it is
+         not storing directly into memory.  So fix this up to avoid
+         crashes in output_constant_pool.  */
+      if (operands [1] == const0_rtx)
+        operands[1] = CONST0_RTX (DFmode);
+      operands[1] = validize_mem (force_const_mem (GET_MODE (operands[0]),
+                                                   operands[1]));
+    }
+
+  /* Handle MEM cases first.  */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      if (register_operand (operands[1], DFmode))
+	goto movdf_is_ok;
+
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (DFmode, operands[1]);
+	}
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], DFmode, 0);
+
+      if (symbolic_operand (operands[1], DFmode))
+	{
+	  operands[1] = legitimize_pic_address (operands[1],
+						DFmode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	}
+    }
+
+ movdf_is_ok:
+  ;
 }")
 
-(define_insn "*movdf_insn"
-  [(set (match_operand:DF 0 "reg_or_nonsymb_mem_operand" "=e,Q,e,T,U,r,Q,r")
-	(match_operand:DF 1 "reg_or_nonsymb_mem_operand"  "e,e,Q,U,T,r,r,Q"))]
+;; Be careful, fmovd does not exist when !v9.
+(define_insn "*movdf_insn_sp32"
+  [(set (match_operand:DF 0 "general_operand" "=e,T,U,T,e,r,r,o,e,o")
+	(match_operand:DF 1 "input_operand"    "T,e,T,U,e,r,o,r,o,e"))]
   "TARGET_FPU
+   && ! TARGET_V9
    && (register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
-  "*
-{
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1]))
-    return output_fp_move_double (operands);
-  return output_move_double (operands);
-}"
-  [(set_attr "type" "fp,fpstore,fpload,fpstore,fpload,move,store,load")
-   (set_attr "length" "2,3,3,1,1,2,2,2")])
-
-;; Exactly the same as above, except that all `e' cases are deleted.
-;; This is necessary to prevent reload from ever trying to use a `e' reg
-;; when -mno-fpu.
-
-(define_insn "*movdf_no_e_insn"
-  [(set (match_operand:DF 0 "reg_or_nonsymb_mem_operand" "=T,U,r,Q,&r")
-	(match_operand:DF 1 "reg_or_nonsymb_mem_operand"  "U,T,r,r,Q"))]
+  "@
+  ldd\\t%1, %0
+  std\\t%1, %0
+  ldd\\t%1, %0
+  std\\t%1, %0
+  #
+  #
+  #
+  #
+  #
+  #"
+ [(set_attr "type" "fpload,fpstore,load,store,*,*,*,*,*,*")
+  (set_attr "length" "1,1,1,1,2,2,2,2,2,2")])
+
+(define_insn "*movdf_no_e_insn_sp32"
+  [(set (match_operand:DF 0 "general_operand" "=U,T,r,r,o")
+	(match_operand:DF 1 "input_operand"    "T,U,r,o,r"))]
   "! TARGET_FPU
+   && ! TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "@
+  ldd\\t%1, %0
+  std\\t%1, %0
+  #
+  #
+  #"
+  [(set_attr "type" "load,store,*,*,*")
+   (set_attr "length" "1,1,2,2,2")])
+
+;; We have available v9 double floats but not 64-bit
+;; integer registers.
+(define_insn "*movdf_insn_v9only"
+  [(set (match_operand:DF 0 "general_operand" "=e,e,m,U,T,r,r,o")
+        (match_operand:DF 1 "input_operand"    "e,m,e,T,U,r,o,r"))]
+  "TARGET_FPU
+   && TARGET_V9
+   && ! TARGET_ARCH64
    && (register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
-  "* return output_move_double (operands);"
-  [(set_attr "type" "store,load,move,store,load")
-   (set_attr "length" "1,1,2,3,3")])
+  "@
+  fmovd\\t%1, %0
+  ldd\\t%1, %0
+  std\\t%1, %0
+  ldd\\t%1, %0
+  std\\t%1, %0
+  #
+  #
+  #"
+  [(set_attr "type" "fpmove,load,store,load,store,*,*,*")
+   (set_attr "length" "1,1,1,1,1,2,2,2")])
+
+;; We have available both v9 double floats and 64-bit
+;; integer registers.
+(define_insn "*movdf_insn_sp64"
+  [(set (match_operand:DF 0 "general_operand" "=e,e,m,r,r,m")
+        (match_operand:DF 1 "input_operand"    "e,m,e,r,m,r"))]
+  "TARGET_FPU
+   && TARGET_V9
+   && TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "@
+  fmovd\\t%1, %0
+  ldd\\t%1, %0
+  std\\t%1, %0
+  mov\\t%1, %0
+  ldx\\t%1, %0
+  stx\\t%1, %0"
+  [(set_attr "type" "fpmove,load,store,move,load,store")
+   (set_attr "length" "1")])
 
-;; Must handle overlapping registers here, since parameters can be unaligned
-;; in registers.
+(define_insn "*movdf_no_e_insn_sp64"
+  [(set (match_operand:DF 0 "general_operand" "=r,r,m")
+        (match_operand:DF 1 "input_operand"    "r,m,r"))]
+  "! TARGET_FPU
+   && TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "@
+  mov\\t%1, %0
+  ldx\\t%1, %0
+  stx\\t%1, %0"
+  [(set_attr "type" "move,load,store")
+   (set_attr "length" "1")])
 
+;; Ok, now the splits to handle all the multi insn and
+;; mis-aligned memory address cases.
+;; In these splits please take note that we must be
+;; careful when V9 but not ARCH64 because the integer
+;; register DFmode cases must be handled.
 (define_split
   [(set (match_operand:DF 0 "register_operand" "")
-	(match_operand:DF 1 "register_operand" ""))]
-  "! TARGET_ARCH64 && reload_completed
-   && REGNO (operands[0]) < SPARC_FIRST_V9_FP_REG
-   && REGNO (operands[1]) < SPARC_FIRST_V9_FP_REG"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (match_dup 5))]
+        (match_operand:DF 1 "register_operand" ""))]
+  "(! TARGET_V9
+    || (! TARGET_ARCH64
+        && ((GET_CODE (operands[0]) == REG
+             && REGNO (operands[0]) < 32)
+            || (GET_CODE (operands[0]) == SUBREG
+                && GET_CODE (SUBREG_REG (operands[0])) == REG
+                && REGNO (SUBREG_REG (operands[0])) < 32))))
+   && reload_completed"
+  [(clobber (const_int 0))]
   "
 {
-  rtx first_set = operand_subword (operands[0], 0, 0, DFmode);
-  rtx second_use = operand_subword (operands[1], 1, 0, DFmode);
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
 
-  if (REGNO (first_set) == REGNO (second_use))
+  if (GET_CODE (set_dest) == SUBREG)
+    set_dest = alter_subreg (set_dest);
+  if (GET_CODE (set_src) == SUBREG)
+    set_src = alter_subreg (set_src);
+
+  dest1 = gen_highpart (SFmode, set_dest);
+  dest2 = gen_lowpart (SFmode, set_dest);
+  src1 = gen_highpart (SFmode, set_src);
+  src2 = gen_lowpart (SFmode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
     {
-      operands[2] = operand_subword (operands[0], 1, 0, DFmode);
-      operands[3] = second_use;
-      operands[4] = first_set;
-      operands[5] = operand_subword (operands[1], 0, 0, DFmode);
+      emit_insn (gen_movsf (dest2, src2));
+      emit_insn (gen_movsf (dest1, src1));
     }
   else
     {
-      operands[2] = first_set;
-      operands[3] = operand_subword (operands[1], 0, 0, DFmode);
-      operands[4] = operand_subword (operands[0], 1, 0, DFmode);
-      operands[5] = second_use;
+      emit_insn (gen_movsf (dest1, src1));
+      emit_insn (gen_movsf (dest2, src2));
     }
+  DONE;
 }")
 
-(define_insn "*store_df"
-  [(set (mem:DF (match_operand:SI 0 "symbolic_operand" "i,i"))
-	(match_operand:DF 1 "reg_or_0_operand" "re,G"))
-   (clobber (match_scratch:SI 2 "=&r,&r"))]
-  "(reload_completed || reload_in_progress)
-   && ! TARGET_PTR64"
-  "*
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))]
+  "((! TARGET_V9
+     || (! TARGET_ARCH64
+         && ((GET_CODE (operands[0]) == REG
+              && REGNO (operands[0]) < 32)
+             || (GET_CODE (operands[0]) == SUBREG
+                 && GET_CODE (SUBREG_REG (operands[0])) == REG
+                 && REGNO (SUBREG_REG (operands[0])) < 32))))
+    && (reload_completed
+        && (((REGNO (operands[0])) % 2) != 0
+             || ! mem_min_alignment (operands[1], 8))
+        && offsettable_memref_p (operands[1])))"
+  [(clobber (const_int 0))]
+  "
 {
-  output_asm_insn (\"sethi %%hi(%a0),%2\", operands);
-  if (which_alternative == 0)
-    return \"std %1,[%2+%%lo(%a0)]\";
+  rtx word0 = change_address (operands[1], SFmode, NULL_RTX);
+  rtx word1 = change_address (operands[1], SFmode,
+			      plus_constant_for_output (XEXP (word0, 0), 4));
+
+  if (GET_CODE (operands[0]) == SUBREG)
+    operands[0] = alter_subreg (operands[0]);
+
+  if (reg_overlap_mentioned_p (gen_highpart (SFmode, operands[0]), word1))
+    {
+      emit_insn (gen_movsf (gen_lowpart (SFmode, operands[0]),
+			    word1));
+      emit_insn (gen_movsf (gen_highpart (SFmode, operands[0]),
+			    word0));
+    }
   else
-    return \"st %%g0,[%2+%%lo(%a0)]\;st %%g0,[%2+%%lo(%a0+4)]\";
-}"
-  [(set_attr "type" "store")
-   (set_attr "length" "3")])
-
-;; This pattern forces (set (reg:TF ...) (const_double ...))
-;; to be reloaded by putting the constant into memory.
-;; It must come before the more general movtf pattern.
-(define_insn "*movtf_const_insn"
-  [(set (match_operand:TF 0 "general_operand" "=?r,e,o")
-	(match_operand:TF 1 "" "?F,m,G"))]
-  "TARGET_FPU
-   && GET_CODE (operands[1]) == CONST_DOUBLE
-   && (GET_CODE (operands[0]) == REG
-       || fp_zero_operand (operands[1]))"
-  "*
-{
-  switch (which_alternative)
     {
-    case 0:
-      return output_move_quad (operands);
-    case 1:
-      return output_fp_move_quad (operands);
-    case 2:
-      if (TARGET_ARCH64 || (TARGET_V9 && mem_aligned_8 (operands[0])))
-	{
-	  operands[1] = adj_offsettable_operand (operands[0], 8);
-	  return \"stx %%g0,%0\;stx %%g0,%1\";
-	}
-      else
-	{
-	  /* ??? Do we run off the end of the array here? */
-	  operands[1] = adj_offsettable_operand (operands[0], 4);
-	  operands[2] = adj_offsettable_operand (operands[0], 8);
-	  operands[3] = adj_offsettable_operand (operands[0], 12);
-	  return \"st %%g0,%0\;st %%g0,%1\;st %%g0,%2\;st %%g0,%3\";
-	}
-    default:
-      abort ();
+      emit_insn (gen_movsf (gen_highpart (SFmode, operands[0]),
+			    word0));
+      emit_insn (gen_movsf (gen_lowpart (SFmode, operands[0]),
+			    word1));
     }
-}"
-  [(set_attr "type" "load,fpload,store")
-   (set_attr "length" "5,5,5")])
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "register_operand" ""))]
+  "((! TARGET_V9
+     || (! TARGET_ARCH64
+         && ((GET_CODE (operands[1]) == REG
+              && REGNO (operands[1]) < 32)
+             || (GET_CODE (operands[1]) == SUBREG
+                 && GET_CODE (SUBREG_REG (operands[1])) == REG
+                 && REGNO (SUBREG_REG (operands[1])) < 32))))
+    && (reload_completed
+        && (((REGNO (operands[1])) % 2) != 0
+             || ! mem_min_alignment (operands[0], 8))
+        && offsettable_memref_p (operands[0])))"
+  [(clobber (const_int 0))]
+  "
+{
+  rtx word0 = change_address (operands[0], SFmode, NULL_RTX);
+  rtx word1 = change_address (operands[0], SFmode,
+			      plus_constant_for_output (XEXP (word0, 0), 4));
+
+  if (GET_CODE (operands[1]) == SUBREG)
+    operands[1] = alter_subreg (operands[1]);
+  emit_insn (gen_movsf (word0,
+			gen_highpart (SFmode, operands[1])));
+  emit_insn (gen_movsf (word1,
+			gen_lowpart (SFmode, operands[1])));
+  DONE;
+}")
 
 (define_expand "movtf"
   [(set (match_operand:TF 0 "general_operand" "")
@@ -2543,63 +3350,219 @@
   ""
   "
 {
-  if (emit_move_sequence (operands, TFmode))
-    DONE;
+  /* Force TFmode constants into memory. */
+  if (GET_CODE (operands[0]) == REG
+      && CONSTANT_P (operands[1]))
+    {
+      /* emit_group_store will send such bogosity to us when it is
+         not storing directly into memory.  So fix this up to avoid
+         crashes in output_constant_pool.  */
+      if (operands [1] == const0_rtx)
+        operands[1] = CONST0_RTX (TFmode);
+      operands[1] = validize_mem (force_const_mem (GET_MODE (operands[0]),
+                                                   operands[1]));
+    }
+
+  /* Handle MEM cases first, note that only v9 guarentees
+     full 16-byte alignment for quads. */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      if (register_operand (operands[1], TFmode))
+        goto movtf_is_ok;
+
+      if (! reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (TFmode, operands[1]);
+	}
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (CONSTANT_P (operands[1])
+	  && pic_address_needs_scratch (operands[1]))
+	operands[1] = legitimize_pic_address (operands[1], TFmode, 0);
+
+      if (symbolic_operand (operands[1], TFmode))
+	{
+	  operands[1] = legitimize_pic_address (operands[1],
+						TFmode,
+						(reload_in_progress ?
+						 operands[0] :
+						 NULL_RTX));
+	}
+    }
+
+ movtf_is_ok:
+  ;
 }")
 
-(define_insn "*movtf_insn"
-  [(set (match_operand:TF 0 "reg_or_nonsymb_mem_operand" "=e,Q,e,r,Q,r")
-	(match_operand:TF 1 "reg_or_nonsymb_mem_operand"  "e,e,Q,r,r,Q"))]
+;; Be careful, fmovq and {st,ld}{x,q} do not exist when !arch64 so
+;; we must split them all.  :-(
+(define_insn "*movtf_insn_sp32"
+  [(set (match_operand:TF 0 "general_operand" "=e,o,U,o,e,r,r,o")
+	(match_operand:TF 1 "input_operand"    "o,e,o,U,e,r,o,r"))]
   "TARGET_FPU
+   && ! TARGET_ARCH64
    && (register_operand (operands[0], TFmode)
        || register_operand (operands[1], TFmode))"
-  "*
-{
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1]))
-    return output_fp_move_quad (operands);
-  return output_move_quad (operands);
-}"
-  [(set_attr "type" "fp,fpstore,fpload,move,store,load")
-   (set_attr "length" "5,4,4,5,4,4")])
+  "#"
+  [(set_attr "length" "4")])
 
 ;; Exactly the same as above, except that all `e' cases are deleted.
 ;; This is necessary to prevent reload from ever trying to use a `e' reg
 ;; when -mno-fpu.
 
-(define_insn "*movtf_no_e_insn"
-  [(set (match_operand:TF 0 "reg_or_nonsymb_mem_operand" "=r,Q,&r")
-	(match_operand:TF 1 "reg_or_nonsymb_mem_operand" "r,r,Q"))]
+(define_insn "*movtf_no_e_insn_sp32"
+  [(set (match_operand:TF 0 "general_operand" "=U,o,r,r,o")
+	(match_operand:TF 1 "input_operand"    "o,U,r,o,r"))]
   "! TARGET_FPU
+   && ! TARGET_ARCH64
    && (register_operand (operands[0], TFmode)
        || register_operand (operands[1], TFmode))"
-  "*
+  "#"
+  [(set_attr "length" "4")])
+
+;; Now handle the float reg cases directly when arch64,
+;; hard_quad, and proper reg number alignment are all true.
+(define_insn "*movtf_insn_hq_sp64"
+  [(set (match_operand:TF 0 "general_operand" "=e,e,m,r,r,o")
+        (match_operand:TF 1 "input_operand"    "e,m,e,r,o,r"))]
+  "TARGET_FPU
+   && TARGET_ARCH64
+   && TARGET_V9
+   && TARGET_HARD_QUAD
+   && (register_operand (operands[0], TFmode)
+       || register_operand (operands[1], TFmode))"
+  "@
+  fmovq\\t%1, %0
+  ldq\\t%1, %0
+  stq\\t%1, %0
+  #
+  #
+  #"
+  [(set_attr "type" "fpmove,fpload,fpstore,*,*,*")
+   (set_attr "length" "1,1,1,2,2,2")])
+
+;; Now we allow the integer register cases even when
+;; only arch64 is true.
+(define_insn "*movtf_insn_sp64"
+  [(set (match_operand:TF 0 "general_operand" "=e,o,r,o,e,r")
+        (match_operand:TF 1 "input_operand"    "o,e,o,r,e,r"))]
+  "TARGET_FPU
+   && TARGET_ARCH64
+   && ! TARGET_HARD_QUAD
+   && (register_operand (operands[0], TFmode)
+       || register_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "2")])
+
+(define_insn "*movtf_no_e_insn_sp64"
+  [(set (match_operand:TF 0 "general_operand" "=r,o,r")
+        (match_operand:TF 1 "input_operand"    "o,r,r"))]
+  "! TARGET_FPU
+   && TARGET_ARCH64
+   && (register_operand (operands[0], TFmode)
+       || register_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "2")])
+
+;; Now all the splits to handle multi-insn TF mode moves.
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+        (match_operand:TF 1 "register_operand" ""))]
+  "reload_completed
+   && (! TARGET_ARCH64
+       || (TARGET_FPU
+           && ! TARGET_HARD_QUAD))"
+  [(clobber (const_int 0))]
+  "
 {
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1]))
-    return output_fp_move_quad (operands);
-  return output_move_quad (operands);
-}"
-  [(set_attr "type" "move,store,load")
-   (set_attr "length" "4,5,5")])
-
-;; This is disabled because it does not work.  Long doubles have only 8
-;; byte alignment.  Adding an offset of 8 or 12 to an 8 byte aligned %lo may 
-;; cause it to overflow.  See also GO_IF_LEGITIMATE_ADDRESS.
-(define_insn "*store_tf"
-  [(set (mem:TF (match_operand:SI 0 "symbolic_operand" "i,i"))
-	(match_operand:TF 1 "reg_or_0_operand" "re,G"))
-   (clobber (match_scratch:SI 2 "=&r,&r"))]
-  "0 && (reload_completed || reload_in_progress)
-   && ! TARGET_PTR64"
-  "*
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  if (GET_CODE (set_dest) == SUBREG)
+    set_dest = alter_subreg (set_dest);
+  if (GET_CODE (set_src) == SUBREG)
+    set_src = alter_subreg (set_src);
+
+  /* Ugly, but gen_highpart will crap out here for 32-bit targets.  */
+  dest1 = gen_rtx_SUBREG (DFmode, set_dest, WORDS_BIG_ENDIAN == 0);
+  dest2 = gen_rtx_SUBREG (DFmode, set_dest, WORDS_BIG_ENDIAN != 0);
+  src1 = gen_rtx_SUBREG (DFmode, set_src, WORDS_BIG_ENDIAN == 0);
+  src2 = gen_rtx_SUBREG (DFmode, set_src, WORDS_BIG_ENDIAN != 0);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movdf (dest2, src2));
+      emit_insn (gen_movdf (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movdf (dest1, src1));
+      emit_insn (gen_movdf (dest2, src2));
+    }
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+        (match_operand:TF 1 "memory_operand" ""))]
+  "(reload_completed
+    && offsettable_memref_p (operands[1]))"
+  [(clobber (const_int 0))]
+  "
 {
-  output_asm_insn (\"sethi %%hi(%a0),%2\", operands);
-  if (which_alternative == 0)
-    return \"std %1,[%2+%%lo(%a0)]\;std %S1,[%2+%%lo(%a0+8)]\";
+  rtx word0 = change_address (operands[1], DFmode, NULL_RTX);
+  rtx word1 = change_address (operands[1], DFmode,
+			      plus_constant_for_output (XEXP (word0, 0), 8));
+  rtx dest1, dest2;
+
+  /* Ugly, but gen_highpart will crap out here for 32-bit targets.  */
+  dest1 = gen_rtx_SUBREG (DFmode, operands[0], WORDS_BIG_ENDIAN == 0);
+  dest2 = gen_rtx_SUBREG (DFmode, operands[0], WORDS_BIG_ENDIAN != 0);
+
+  /* Now output, ordering such that we don't clobber any registers
+     mentioned in the address.  */
+  if (reg_overlap_mentioned_p (dest1, word1))
+
+    {
+      emit_insn (gen_movdf (dest2, word1));
+      emit_insn (gen_movdf (dest1, word0));
+    }
   else
-    return \"st %%g0,[%2+%%lo(%a0)]\;st %%g0,[%2+%%lo(%a0+4)]\; st %%g0,[%2+%%lo(%a0+8)]\;st %%g0,[%2+%%lo(%a0+12)]\";
-}"
-  [(set_attr "type" "store")
-   (set_attr "length" "5")])
+   {
+      emit_insn (gen_movdf (dest1, word0));
+      emit_insn (gen_movdf (dest2, word1));
+   }
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:TF 0 "memory_operand" "")
+	(match_operand:TF 1 "register_operand" ""))]
+  "(reload_completed
+    && offsettable_memref_p (operands[0]))"
+  [(clobber (const_int 0))]
+  "
+{
+  rtx word0 = change_address (operands[0], DFmode, NULL_RTX);
+  rtx word1 = change_address (operands[0], DFmode,
+			      plus_constant_for_output (XEXP (word0, 0), 8));
+  rtx src1, src2;
+
+  /* Ugly, but gen_highpart will crap out here for 32-bit targets.  */
+  src1 = gen_rtx_SUBREG (DFmode, operands[1], WORDS_BIG_ENDIAN == 0);
+  src2 = gen_rtx_SUBREG (DFmode, operands[1], WORDS_BIG_ENDIAN != 0);
+  emit_insn (gen_movdf (word0, src1));
+  emit_insn (gen_movdf (word1, src2));
+  DONE;
+}")
 
 ;; Sparc V9 conditional move instructions.
 
@@ -2682,8 +3645,7 @@
 
   if (sparc_compare_op1 == const0_rtx
       && GET_CODE (sparc_compare_op0) == REG
-      && ((TARGET_ARCH64 && op0_mode == DImode && v9_regcmp_p (code))
-	  || (op0_mode == SImode && v8plus_regcmp_p (code))))
+      && (TARGET_ARCH64 && op0_mode == DImode && v9_regcmp_p (code)))
     {
       operands[1] = gen_rtx_fmt_ee (code, op0_mode,
 			     sparc_compare_op0, sparc_compare_op1);
@@ -2821,39 +3783,42 @@
 	(if_then_else:QI (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:QI 3 "arith11_operand" "rL,0")
-		      (match_operand:QI 4 "arith11_operand" "0,rL")))]
+                         (match_operand:QI 3 "arith11_operand" "rL,0")
+                         (match_operand:QI 4 "arith11_operand" "0,rL")))]
   "TARGET_V9"
   "@
-   mov%C1 %x2,%3,%0
-   mov%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+   mov%C1\\t%x2, %3, %0
+   mov%c1\\t%x2, %4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movhi_cc_sp64"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
 	(if_then_else:HI (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:HI 3 "arith11_operand" "rL,0")
-		      (match_operand:HI 4 "arith11_operand" "0,rL")))]
+                         (match_operand:HI 3 "arith11_operand" "rL,0")
+                         (match_operand:HI 4 "arith11_operand" "0,rL")))]
   "TARGET_V9"
   "@
-   mov%C1 %x2,%3,%0
-   mov%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+   mov%C1\\t%x2, %3, %0
+   mov%c1\\t%x2, %4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movsi_cc_sp64"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(if_then_else:SI (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:SI 3 "arith11_operand" "rL,0")
-		      (match_operand:SI 4 "arith11_operand" "0,rL")))]
+                         (match_operand:SI 3 "arith11_operand" "rL,0")
+                         (match_operand:SI 4 "arith11_operand" "0,rL")))]
   "TARGET_V9"
   "@
-   mov%C1 %x2,%3,%0
-   mov%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+   mov%C1\\t%x2, %3, %0
+   mov%c1\\t%x2, %4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 ;; ??? The constraints of operands 3,4 need work.
 (define_insn "*movdi_cc_sp64"
@@ -2861,142 +3826,112 @@
 	(if_then_else:DI (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:DI 3 "arith11_double_operand" "rLH,0")
-		      (match_operand:DI 4 "arith11_double_operand" "0,rLH")))]
+                         (match_operand:DI 3 "arith11_double_operand" "rLH,0")
+                         (match_operand:DI 4 "arith11_double_operand" "0,rLH")))]
   "TARGET_ARCH64"
   "@
-   mov%C1 %x2,%3,%0
-   mov%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+   mov%C1\\t%x2, %3, %0
+   mov%c1\\t%x2, %4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
+
+(define_insn "*movdi_cc_sp64_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
+				 (const_int 0)])
+                         (match_operand:SI 3 "arith11_double_operand" "rLH,0")
+                         (match_operand:SI 4 "arith11_double_operand" "0,rLH")))]
+  "TARGET_ARCH64"
+  "@
+   mov%C1\\t%x2, %3, %0
+   mov%c1\\t%x2, %4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movsf_cc_sp64"
   [(set (match_operand:SF 0 "register_operand" "=f,f")
 	(if_then_else:SF (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:SF 3 "register_operand" "f,0")
-		      (match_operand:SF 4 "register_operand" "0,f")))]
+                         (match_operand:SF 3 "register_operand" "f,0")
+                         (match_operand:SF 4 "register_operand" "0,f")))]
   "TARGET_V9 && TARGET_FPU"
   "@
-   fmovs%C1 %x2,%3,%0
-   fmovs%c1 %x2,%4,%0"
-  [(set_attr "type" "fpcmove")])
+   fmovs%C1\\t%x2, %3, %0
+   fmovs%c1\\t%x2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movdf_cc_sp64"
   [(set (match_operand:DF 0 "register_operand" "=e,e")
 	(if_then_else:DF (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:DF 3 "register_operand" "e,0")
-		      (match_operand:DF 4 "register_operand" "0,e")))]
+                         (match_operand:DF 3 "register_operand" "e,0")
+                         (match_operand:DF 4 "register_operand" "0,e")))]
   "TARGET_V9 && TARGET_FPU"
   "@
-   fmovd%C1 %x2,%3,%0
-   fmovd%c1 %x2,%4,%0"
-  [(set_attr "type" "fpcmove")])
+   fmovd%C1\\t%x2, %3, %0
+   fmovd%c1\\t%x2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movtf_cc_sp64"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
 	(if_then_else:TF (match_operator 1 "comparison_operator"
 				[(match_operand 2 "icc_or_fcc_reg_operand" "X,X")
 				 (const_int 0)])
-		      (match_operand:TF 3 "register_operand" "e,0")
-		      (match_operand:TF 4 "register_operand" "0,e")))]
+                         (match_operand:TF 3 "register_operand" "e,0")
+                         (match_operand:TF 4 "register_operand" "0,e")))]
   "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
   "@
-   fmovq%C1 %x2,%3,%0
-   fmovq%c1 %x2,%4,%0"
-  [(set_attr "type" "fpcmove")])
+   fmovq%C1\\t%x2, %3, %0
+   fmovq%c1\\t%x2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movqi_cc_reg_sp64"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
 	(if_then_else:QI (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:QI 3 "arith10_operand" "rM,0")
-		      (match_operand:QI 4 "arith10_operand" "0,rM")))]
+                         (match_operand:QI 3 "arith10_operand" "rM,0")
+                         (match_operand:QI 4 "arith10_operand" "0,rM")))]
   "TARGET_ARCH64"
   "@
-   movr%D1 %2,%r3,%0
-   movr%d1 %2,%r4,%0"
-  [(set_attr "type" "cmove")])
+   movr%D1\\t%2, %r3, %0
+   movr%d1\\t%2, %r4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movhi_cc_reg_sp64"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
 	(if_then_else:HI (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:HI 3 "arith10_operand" "rM,0")
-		      (match_operand:HI 4 "arith10_operand" "0,rM")))]
+                         (match_operand:HI 3 "arith10_operand" "rM,0")
+                         (match_operand:HI 4 "arith10_operand" "0,rM")))]
   "TARGET_ARCH64"
   "@
-   movr%D1 %2,%r3,%0
-   movr%d1 %2,%r4,%0"
-  [(set_attr "type" "cmove")])
+   movr%D1\\t%2, %r3, %0
+   movr%d1\\t%2, %r4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movsi_cc_reg_sp64"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(if_then_else:SI (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:SI 3 "arith10_operand" "rM,0")
-		      (match_operand:SI 4 "arith10_operand" "0,rM")))]
+                         (match_operand:SI 3 "arith10_operand" "rM,0")
+                         (match_operand:SI 4 "arith10_operand" "0,rM")))]
   "TARGET_ARCH64"
   "@
-   movr%D1 %2,%r3,%0
-   movr%d1 %2,%r4,%0"
-  [(set_attr "type" "cmove")])
-
-;; On UltraSPARC this is slightly worse than cmp/mov %icc if the register
-;; needs to be zero extended but better on average.
-(define_insn "*movsi_cc_reg_v8plus"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
-				[(match_operand:SI 2 "register_operand" "r,r")
-				 (const_int 0)])
-		      (match_operand:SI 3 "arith10_operand" "rM,0")
-		      (match_operand:SI 4 "arith10_operand" "0,rM")))]
-  "TARGET_V9"
-  "*
-{
-  if (! sparc_check_64 (operands[2], insn))
-    output_asm_insn (\"srl %2,0,%2\", operands);
-  if (which_alternative == 0)
-    return \"movr%D1 %2,%r3,%0\";
-  return \"movr%d1 %2,%r4,%0\";
-}"
+   movr%D1\\t%2, %r3, %0
+   movr%d1\\t%2, %r4, %0"
   [(set_attr "type" "cmove")
-   (set_attr "length" "2")])
-
-;; To work well this needs to know the current insn, but that is not an
-;; argument to gen_split_*.
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
-				[(match_operand:SI 2 "register_operand" "r,r")
-				 (const_int 0)])
-		      (match_operand:SI 3 "arith10_operand" "rM,0")
-		      (match_operand:SI 4 "arith10_operand" "0,rM")))]
-  "reload_completed"
-  [(set (match_dup 0)
-	(unspec:SI [(match_dup 1) (match_dup 3) (match_dup 4)] 9))]
-  "if (! sparc_check_64 (operands[2], NULL_RTX))
-     emit_insn (gen_v8plus_clear_high (operands[2], operands[2]));")
-
-;; A conditional move with the condition argument known to be zero extended
-(define_insn ""
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(unspec:SI [(match_operator 1 "v8plus_regcmp_op"
-				    [(match_operand:SI 2 "register_operand" "r,r")
-				     (const_int 0)])
-		    (match_operand:SI 3 "arith10_operand" "rM,0")
-		    (match_operand:SI 4 "arith10_operand" "0,rM")] 9))]
-  "TARGET_V9"
-  "@
-   movr%D1 %2,%r3,%0
-   movr%d1 %2,%r4,%0"
-  [(set_attr "type" "cmove")])
+   (set_attr "length" "1")])
 
 ;; ??? The constraints of operands 3,4 need work.
 (define_insn "*movdi_cc_reg_sp64"
@@ -3004,52 +3939,70 @@
 	(if_then_else:DI (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:DI 3 "arith10_double_operand" "rMH,0")
-		      (match_operand:DI 4 "arith10_double_operand" "0,rMH")))]
+                         (match_operand:DI 3 "arith10_double_operand" "rMH,0")
+                         (match_operand:DI 4 "arith10_double_operand" "0,rMH")))]
   "TARGET_ARCH64"
   "@
-   movr%D1 %2,%r3,%0
-   movr%d1 %2,%r4,%0"
-  [(set_attr "type" "cmove")])
+   movr%D1\\t%2, %r3, %0
+   movr%d1\\t%2, %r4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
+
+(define_insn "*movdi_cc_reg_sp64_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI (match_operator 1 "v9_regcmp_op"
+				[(match_operand:DI 2 "register_operand" "r,r")
+				 (const_int 0)])
+                         (match_operand:SI 3 "arith10_double_operand" "rMH,0")
+                         (match_operand:SI 4 "arith10_double_operand" "0,rMH")))]
+  "TARGET_ARCH64"
+  "@
+   movr%D1\\t%2, %r3, %0
+   movr%d1\\t%2, %r4, %0"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movsf_cc_reg_sp64"
   [(set (match_operand:SF 0 "register_operand" "=f,f")
 	(if_then_else:SF (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:SF 3 "register_operand" "f,0")
-		      (match_operand:SF 4 "register_operand" "0,f")))]
+                         (match_operand:SF 3 "register_operand" "f,0")
+                         (match_operand:SF 4 "register_operand" "0,f")))]
   "TARGET_ARCH64 && TARGET_FPU"
   "@
-   fmovrs%D1 %2,%3,%0
-   fmovrs%d1 %2,%4,%0"
-  [(set_attr "type" "fpcmove")])
+   fmovrs%D1\\t%2, %3, %0
+   fmovrs%d1\\t%2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movdf_cc_reg_sp64"
   [(set (match_operand:DF 0 "register_operand" "=e,e")
 	(if_then_else:DF (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:DF 3 "register_operand" "e,0")
-		      (match_operand:DF 4 "register_operand" "0,e")))]
+                         (match_operand:DF 3 "register_operand" "e,0")
+                         (match_operand:DF 4 "register_operand" "0,e")))]
   "TARGET_ARCH64 && TARGET_FPU"
   "@
-   fmovrd%D1 %2,%3,%0
-   fmovrd%d1 %2,%4,%0"
-  [(set_attr "type" "fpcmove")])
+   fmovrd%D1\\t%2, %3, %0
+   fmovrd%d1\\t%2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "length" "1")])
 
 (define_insn "*movtf_cc_reg_sp64"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
 	(if_then_else:TF (match_operator 1 "v9_regcmp_op"
 				[(match_operand:DI 2 "register_operand" "r,r")
 				 (const_int 0)])
-		      (match_operand:TF 3 "register_operand" "e,0")
-		      (match_operand:TF 4 "register_operand" "0,e")))]
+                         (match_operand:TF 3 "register_operand" "e,0")
+                         (match_operand:TF 4 "register_operand" "0,e")))]
   "TARGET_ARCH64 && TARGET_FPU"
   "@
-   fmovrq%D1 %2,%3,%0
-   fmovrq%d1 %2,%4,%0"
-  [(set_attr "type" "fpcmove")])
+   fmovrq%D1\\t%2, %3, %0
+   fmovrq%d1\\t%2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "length" "1")])
 
 ;;- zero extension instructions
 
@@ -3084,8 +4037,9 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
   ""
-  "lduh %1,%0"
-  [(set_attr "type" "load")])
+  "lduh\\t%1, %0"
+  [(set_attr "type" "load")
+   (set_attr "length" "1")])
 
 (define_expand "zero_extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "")
@@ -3095,11 +4049,11 @@
 
 (define_insn "*zero_extendqihi2_insn"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(zero_extend:HI (match_operand:QI 1 "sparc_operand" "r,Q")))]
+	(zero_extend:HI (match_operand:QI 1 "input_operand" "r,m")))]
   "GET_CODE (operands[1]) != CONST_INT"
   "@
-   and %1,0xff,%0
-   ldub %1,%0"
+   and\\t%1, 0xff, %0
+   ldub\\t%1, %0"
   [(set_attr "type" "unary,load")
    (set_attr "length" "1")])
 
@@ -3111,11 +4065,11 @@
 
 (define_insn "*zero_extendqisi2_insn"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(zero_extend:SI (match_operand:QI 1 "sparc_operand" "r,Q")))]
+	(zero_extend:SI (match_operand:QI 1 "input_operand" "r,m")))]
   "GET_CODE (operands[1]) != CONST_INT"
   "@
-   and %1,0xff,%0
-   ldub %1,%0"
+   and\\t%1, 0xff, %0
+   ldub\\t%1, %0"
   [(set_attr "type" "unary,load")
    (set_attr "length" "1")])
 
@@ -3127,11 +4081,11 @@
 
 (define_insn "*zero_extendqidi2_insn"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (match_operand:QI 1 "sparc_operand" "r,Q")))]
+	(zero_extend:DI (match_operand:QI 1 "input_operand" "r,m")))]
   "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT"
   "@
-   and %1,0xff,%0
-   ldub %1,%0"
+   and\\t%1, 0xff, %0
+   ldub\\t%1, %0"
   [(set_attr "type" "unary,load")
    (set_attr "length" "1")])
 
@@ -3162,8 +4116,9 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
   "TARGET_ARCH64"
-  "lduh %1,%0"
-  [(set_attr "type" "load")])
+  "lduh\\t%1, %0"
+  [(set_attr "type" "load")
+   (set_attr "length" "1")])
 
 
 ;; ??? Write truncdisi pattern using sra?
@@ -3171,32 +4126,59 @@
 (define_expand "zero_extendsidi2"
   [(set (match_operand:DI 0 "register_operand" "")
 	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))]
-  "TARGET_ARCH64"
+  ""
   "")
 
-(define_insn "*zero_extendsidi2_insn"
+(define_insn "*zero_extendsidi2_insn_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (match_operand:SI 1 "sparc_operand" "r,Q")))]
+	(zero_extend:DI (match_operand:SI 1 "input_operand" "r,m")))]
   "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT"
   "@
-   srl %1,0,%0
-   lduw %1,%0"
-  [(set_attr "type" "unary,load")
+   srl\\t%1, 0, %0
+   lduw\\t%1, %0"
+  [(set_attr "type" "shift,load")
    (set_attr "length" "1")])
 
-;; Zero extend a 32 bit value in a 64 bit register.
-(define_insn "v8plus_clear_high"
-  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,Q")
-	(unspec:SI [(match_operand:SI 1 "register_operand" "r,r")] 10))]
-  "TARGET_V9"
-  "*
-if (which_alternative == 1)
-  return \"st %1,%0\";
-if (sparc_check_64 (operands[1], insn) > 0)
-  return final_sequence ? \"nop\" : \"\";
-return \"srl %1,0,%0\";
-"
-  [(set_attr "type" "shift,store")])
+(define_insn "*zero_extendsidi2_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  "! TARGET_ARCH64"
+  "#"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  rtx dest1, dest2;
+
+  if (GET_CODE (operands[0]) == SUBREG)
+    operands[0] = alter_subreg (operands[0]);
+
+  dest1 = gen_highpart (SImode, operands[0]);
+  dest2 = gen_lowpart (SImode, operands[0]);
+
+  /* Swap the order in case of overlap.  */
+  if (REGNO (dest1) == REGNO (operands[1]))
+    {
+      operands[2] = dest2;
+      operands[3] = operands[1];
+      operands[4] = dest1;
+      operands[5] = const0_rtx;
+    }
+  else
+    {
+      operands[2] = dest1;
+      operands[3] = const0_rtx;
+      operands[4] = dest2;
+      operands[5] = operands[1];
+    }
+}")
 
 ;; Simplify comparisons of extended values.
 
@@ -3204,9 +4186,10 @@ return \"srl %1,0,%0\";
   [(set (reg:CC 100)
 	(compare:CC (zero_extend:SI (match_operand:QI 0 "register_operand" "r"))
 		    (const_int 0)))]
-  ""
-  "andcc %0,0xff,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "andcc\\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_zero_extendqisi2_set"
   [(set (reg:CC 100)
@@ -3215,28 +4198,71 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(zero_extend:SI (match_dup 1)))]
   ""
-  "andcc %1,0xff,%0"
-  [(set_attr "type" "unary")])
+  "andcc\\t%1, 0xff, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*cmp_zero_extendqidi2"
+  [(set (reg:CCX 100)
+	(compare:CCX (zero_extend:DI (match_operand:QI 0 "register_operand" "r"))
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*cmp_zero_extendqidi2_set"
+  [(set (reg:CCX 100)
+	(compare:CCX (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "andcc\\t%1, 0xff, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
-;; Similarly, handle SI->QI mode truncation followed by a compare.
+;; Similarly, handle {SI,DI}->QI mode truncation followed by a compare.
 
 (define_insn "*cmp_siqi_trunc"
   [(set (reg:CC 100)
 	(compare:CC (subreg:QI (match_operand:SI 0 "register_operand" "r") 0)
 		    (const_int 0)))]
-  ""
-  "andcc %0,0xff,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "andcc\\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_siqi_trunc_set"
   [(set (reg:CC 100)
 	(compare:CC (subreg:QI (match_operand:SI 1 "register_operand" "r") 0)
 		    (const_int 0)))
    (set (match_operand:QI 0 "register_operand" "=r")
-	(match_dup 1))]
+	(subreg:QI (match_dup 1) 0))]
   ""
-  "andcc %1,0xff,%0"
-  [(set_attr "type" "unary")])
+  "andcc\\t%1, 0xff, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*cmp_diqi_trunc"
+  [(set (reg:CC 100)
+	(compare:CC (subreg:QI (match_operand:DI 0 "register_operand" "r") 0)
+		    (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*cmp_diqi_trunc_set"
+  [(set (reg:CC 100)
+	(compare:CC (subreg:QI (match_operand:DI 1 "register_operand" "r") 0)
+		    (const_int 0)))
+   (set (match_operand:QI 0 "register_operand" "=r")
+	(subreg:QI (match_dup 1) 0))]
+  "TARGET_ARCH64"
+  "andcc\\t%1, 0xff, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 ;;- sign extension instructions
 
@@ -3271,8 +4297,9 @@ return \"srl %1,0,%0\";
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
   ""
-  "ldsh %1,%0"
-  [(set_attr "type" "sload")])
+  "ldsh\\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "length" "1")])
 
 (define_expand "extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "")
@@ -3308,8 +4335,9 @@ return \"srl %1,0,%0\";
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
   ""
-  "ldsb %1,%0"
-  [(set_attr "type" "sload")])
+  "ldsb\\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "length" "1")])
 
 (define_expand "extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "")
@@ -3338,8 +4366,9 @@ return \"srl %1,0,%0\";
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
   ""
-  "ldsb %1,%0"
-  [(set_attr "type" "sload")])
+  "ldsb\\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "length" "1")])
 
 (define_expand "extendqidi2"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -3368,8 +4397,9 @@ return \"srl %1,0,%0\";
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
   "TARGET_ARCH64"
-  "ldsb %1,%0"
-  [(set_attr "type" "sload")])
+  "ldsb\\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "length" "1")])
 
 (define_expand "extendhidi2"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -3398,8 +4428,9 @@ return \"srl %1,0,%0\";
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
   "TARGET_ARCH64"
-  "ldsh %1,%0"
-  [(set_attr "type" "load")])
+  "ldsh\\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "length" "1")])
 
 (define_expand "extendsidi2"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -3409,12 +4440,12 @@ return \"srl %1,0,%0\";
 
 (define_insn "*sign_extendsidi2_insn"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(sign_extend:DI (match_operand:SI 1 "sparc_operand" "r,Q")))]
+	(sign_extend:DI (match_operand:SI 1 "input_operand" "r,m")))]
   "TARGET_ARCH64"
   "@
-  sra %1,0,%0
-  ldsw %1,%0"
-  [(set_attr "type" "unary,sload")
+  sra\\t%1, 0, %0
+  ldsw\\t%1, %0"
+  [(set_attr "type" "shift,sload")
    (set_attr "length" "1")])
 
 ;; Special pattern for optimizing bit-field compares.  This is needed
@@ -3424,37 +4455,59 @@ return \"srl %1,0,%0\";
   [(set (reg:CC 100)
 	(compare:CC
 	 (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
-			  (match_operand:SI 1 "small_int" "n")
-			  (match_operand:SI 2 "small_int" "n"))
+			  (match_operand:SI 1 "small_int_or_double" "n")
+			  (match_operand:SI 2 "small_int_or_double" "n"))
 	 (const_int 0)))]
-  "INTVAL (operands[2]) > 19"
+  "! TARGET_LIVE_G0
+   && ((GET_CODE (operands[2]) == CONST_INT
+        && INTVAL (operands[2]) > 19)
+       || (GET_CODE (operands[2]) == CONST_DOUBLE
+           && CONST_DOUBLE_LOW (operands[2]) > 19))"
   "*
 {
-  int len = INTVAL (operands[1]);
-  int pos = 32 - INTVAL (operands[2]) - len;
-  unsigned mask = ((1 << len) - 1) << pos;
+  int len = (GET_CODE (operands[1]) == CONST_INT
+             ? INTVAL (operands[1])
+             : CONST_DOUBLE_LOW (operands[1]));
+  int pos = 32 -
+            (GET_CODE (operands[2]) == CONST_INT
+             ? INTVAL (operands[2])
+             : CONST_DOUBLE_LOW (operands[2])) - len;
+  HOST_WIDE_INT mask = ((1 << len) - 1) << pos;
 
   operands[1] = GEN_INT (mask);
-  return \"andcc %0,%1,%%g0\";
-}")
+  return \"andcc\\t%0, %1, %%g0\";
+}"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_zero_extract_sp64"
   [(set (reg:CCX 100)
 	(compare:CCX
 	 (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
-			  (match_operand:SI 1 "small_int" "n")
-			  (match_operand:SI 2 "small_int" "n"))
+			  (match_operand:SI 1 "small_int_or_double" "n")
+			  (match_operand:SI 2 "small_int_or_double" "n"))
 	 (const_int 0)))]
-  "TARGET_ARCH64 && INTVAL (operands[2]) > 51"
+  "TARGET_ARCH64
+   && ((GET_CODE (operands[2]) == CONST_INT
+        && INTVAL (operands[2]) > 51)
+       || (GET_CODE (operands[2]) == CONST_DOUBLE
+           && CONST_DOUBLE_LOW (operands[2]) > 51))"
   "*
 {
-  int len = INTVAL (operands[1]);
-  int pos = 64 - INTVAL (operands[2]) - len;
-  unsigned HOST_WIDE_INT mask = (((unsigned HOST_WIDE_INT) 1 << len) - 1) << pos;
+  int len = (GET_CODE (operands[1]) == CONST_INT
+             ? INTVAL (operands[1])
+             : CONST_DOUBLE_LOW (operands[1]));
+  int pos = 64 -
+            (GET_CODE (operands[2]) == CONST_INT
+             ? INTVAL (operands[2])
+             : CONST_DOUBLE_LOW (operands[2])) - len;
+  HOST_WIDE_INT mask = (((unsigned HOST_WIDE_INT) 1 << len) - 1) << pos;
 
   operands[1] = GEN_INT (mask);
-  return \"andcc %0,%1,%%g0\";
-}")
+  return \"andcc\\t%0, %1, %%g0\";
+}"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 ;; Conversions between float, double and long double.
 
@@ -3463,48 +4516,54 @@ return \"srl %1,0,%0\";
 	(float_extend:DF
 	 (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU"
-  "fstod %1,%0"
-  [(set_attr "type" "fp")])
+  "fstod\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "extendsftf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(float_extend:TF
 	 (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fstoq %1,%0"
-  [(set_attr "type" "fp")])
+  "fstoq\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "extenddftf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(float_extend:TF
 	 (match_operand:DF 1 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fdtoq %1,%0"
-  [(set_attr "type" "fp")])
+  "fdtoq\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "truncdfsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(float_truncate:SF
 	 (match_operand:DF 1 "register_operand" "e")))]
   "TARGET_FPU"
-  "fdtos %1,%0"
-  [(set_attr "type" "fp")])
+  "fdtos\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "trunctfsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(float_truncate:SF
 	 (match_operand:TF 1 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fqtos %1,%0"
-  [(set_attr "type" "fp")])
+  "fqtos\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "trunctfdf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(float_truncate:DF
 	 (match_operand:TF 1 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fqtod %1,%0"
-  [(set_attr "type" "fp")])
+  "fqtod\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 ;; Conversion between fixed point and floating point.
 
@@ -3512,22 +4571,25 @@ return \"srl %1,0,%0\";
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(float:SF (match_operand:SI 1 "register_operand" "f")))]
   "TARGET_FPU"
-  "fitos %1,%0"
-  [(set_attr "type" "fp")])
+  "fitos\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "floatsidf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(float:DF (match_operand:SI 1 "register_operand" "f")))]
   "TARGET_FPU"
-  "fitod %1,%0"
-  [(set_attr "type" "fp")])
+  "fitod\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "floatsitf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(float:TF (match_operand:SI 1 "register_operand" "f")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fitoq %1,%0"
-  [(set_attr "type" "fp")])
+  "fitoq\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 ;; Now the same for 64 bit sources.
 
@@ -3535,22 +4597,25 @@ return \"srl %1,0,%0\";
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(float:SF (match_operand:DI 1 "register_operand" "e")))]
   "TARGET_V9 && TARGET_FPU"
-  "fxtos %1,%0"
-  [(set_attr "type" "fp")])
+  "fxtos\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "floatdidf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(float:DF (match_operand:DI 1 "register_operand" "e")))]
   "TARGET_V9 && TARGET_FPU"
-  "fxtod %1,%0"
-  [(set_attr "type" "fp")])
+  "fxtod\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "floatditf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(float:TF (match_operand:DI 1 "register_operand" "e")))]
   "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
-  "fxtoq %1,%0"
-  [(set_attr "type" "fp")])
+  "fxtoq\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 ;; Convert a float to an actual integer.
 ;; Truncation is performed as part of the conversion.
@@ -3559,22 +4624,25 @@ return \"srl %1,0,%0\";
   [(set (match_operand:SI 0 "register_operand" "=f")
 	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
   "TARGET_FPU"
-  "fstoi %1,%0"
-  [(set_attr "type" "fp")])
+  "fstoi\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "fix_truncdfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
 	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
   "TARGET_FPU"
-  "fdtoi %1,%0"
-  [(set_attr "type" "fp")])
+  "fdtoi\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "fix_trunctfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
 	(fix:SI (fix:TF (match_operand:TF 1 "register_operand" "e"))))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fqtoi %1,%0"
-  [(set_attr "type" "fp")])
+  "fqtoi\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 ;; Now the same, for V9 targets
 
@@ -3582,29 +4650,32 @@ return \"srl %1,0,%0\";
   [(set (match_operand:DI 0 "register_operand" "=e")
 	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
   "TARGET_V9 && TARGET_FPU"
-  "fstox %1,%0"
-  [(set_attr "type" "fp")])
+  "fstox\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "fix_truncdfdi2"
   [(set (match_operand:DI 0 "register_operand" "=e")
 	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
   "TARGET_V9 && TARGET_FPU"
-  "fdtox %1,%0"
-  [(set_attr "type" "fp")])
+  "fdtox\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "fix_trunctfdi2"
   [(set (match_operand:DI 0 "register_operand" "=e")
 	(fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))]
   "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
-  "fqtox %1,%0"
-  [(set_attr "type" "fp")])
+  "fqtox\\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 ;;- arithmetic instructions
 
 (define_expand "adddi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
+		 (match_operand:DI 2 "arith_double_add_operand" "rHI")))]
   ""
   "
 {
@@ -3618,41 +4689,24 @@ return \"srl %1,0,%0\";
 				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
       DONE;
     }
+  if (arith_double_4096_operand(operands[2], DImode))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_MINUS (DImode, operands[1],
+					     GEN_INT(-4096))));
+      DONE;
+    }
 }")
 
-(define_insn "*adddi3_sp32"
+(define_insn "adddi3_insn_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))
    (clobber (reg:CC 100))]
   "! TARGET_ARCH64"
-  "*
-{
-  rtx op2 = operands[2];
-
-  if (GET_CODE (op2) == CONST_INT
-      || GET_CODE (op2) == CONST_DOUBLE)
-    {
-      rtx xoperands[4];
-      xoperands[0] = operands[0];
-      xoperands[1] = operands[1];
-      if (WORDS_BIG_ENDIAN)
-	split_double (op2, &xoperands[2], &xoperands[3]);
-      else
-	split_double (op2, &xoperands[3], &xoperands[2]);
-      if (xoperands[3] == const0_rtx && xoperands[0] == xoperands[1])
-	output_asm_insn (\"add %H1,%2,%H0\", xoperands);
-      else
-	output_asm_insn (\"addcc %L1,%3,%L0\;addx %H1,%2,%H0\", xoperands);
-      return \"\";
-    }
-  return \"addcc %L1,%L2,%L0\;addx %H1,%H2,%H0\";
-}"
+  "#"
   [(set_attr "length" "2")])
 
-
-;; Split DImode arithmetic
-
 (define_split
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
@@ -3722,58 +4776,163 @@ return \"srl %1,0,%0\";
 }")
 
 ;; LTU here means "carry set"
-(define_insn "*addx"
+(define_insn "addx"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r")
 			  (match_operand:SI 2 "arith_operand" "rI"))
 		 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
   ""
-  "addx %1,%2,%0"
-  [(set_attr "type" "unary")])
+  "addx\\t%1, %2, %0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "1")])
 
-(define_insn "*subx"
+(define_insn "*addx_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+                                          (match_operand:SI 2 "arith_operand" "rI"))
+                                 (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "! TARGET_ARCH64"
+  "#"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (plus:SI (plus:SI (match_operand:SI 1 "reg_or_0_operand" "")
+                                          (match_operand:SI 2 "arith_operand" ""))
+                                 (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(set (match_dup 3) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+                               (ltu:SI (reg:CC_NOOV 100) (const_int 0))))
+   (set (match_dup 4) (const_int 0))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);")
+
+(define_insn "*addx_extend_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+                                          (match_operand:SI 2 "arith_operand" "rI"))
+                                 (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "addx\\t%r1, %2, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
+
+(define_insn "subx"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+	(minus:SI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
 			    (match_operand:SI 2 "arith_operand" "rI"))
 		  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
   ""
-  "subx %1,%2,%0"
-  [(set_attr "type" "unary")])
+  "subx\\t%r1, %2, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
+
+(define_insn "*subx_extend_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "subx\\t%r1, %2, %0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
+
+(define_insn "*subx_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "! TARGET_ARCH64"
+  "#"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+                                (ltu:SI (reg:CC_NOOV 100) (const_int 0))))
+   (set (match_dup 4) (const_int 0))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[0]);")
 
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
-      (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
-               (match_operand:DI 2 "register_operand" "r")))
+        (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+                 (match_operand:DI 2 "register_operand" "r")))
    (clobber (reg:CC 100))]
   "! TARGET_ARCH64"
-  "addcc %L2,%1,%L0\;addx %H2,0,%H0"
-  [(set_attr "type" "multi")])
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+                 (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                   (compare:CC_NOOV (plus:SI (match_dup 3) (match_dup 1))
+                                    (const_int 0)))
+              (set (match_dup 5) (plus:SI (match_dup 3) (match_dup 1)))])
+   (set (match_dup 6)
+        (plus:SI (plus:SI (match_dup 4) (const_int 0))
+                 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[2]);
+   operands[4] = gen_highpart (SImode, operands[2]);
+   operands[5] = gen_lowpart (SImode, operands[0]);
+   operands[6] = gen_highpart (SImode, operands[0]);")
 
 (define_insn "*adddi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
-  "add %1,%2,%0")
+  "add\\t%1, %2, %0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "1")])
 
-(define_insn "addsi3"
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(plus:SI (match_operand:SI 1 "arith_operand" "%r,d")
+		 (match_operand:SI 2 "arith_add_operand" "rI,d")))]
+  ""
+  "
+{
+  if (arith_4096_operand(operands[2], DImode))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_MINUS (SImode, operands[1],
+					     GEN_INT(-4096))));
+      DONE;
+    }
+}")
+
+(define_insn "*addsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
 	(plus:SI (match_operand:SI 1 "arith_operand" "%r,d")
 		 (match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
   "@
-   add %1,%2,%0
-   fpadd32s %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   add\\t%1, %2, %0
+   fpadd32s\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_plus"
   [(set (reg:CC_NOOV 100)
 	(compare:CC_NOOV (plus:SI (match_operand:SI 0 "arith_operand" "%r")
 				  (match_operand:SI 1 "arith_operand" "rI"))
 			 (const_int 0)))]
-  ""
-  "addcc %0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "addcc\\t%0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_plus"
   [(set (reg:CCX_NOOV 100)
@@ -3781,8 +4940,9 @@ return \"srl %1,0,%0\";
 				   (match_operand:DI 1 "arith_double_operand" "rHI"))
 			  (const_int 0)))]
   "TARGET_ARCH64"
-  "addcc %0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "addcc\\t%0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_plus_set"
   [(set (reg:CC_NOOV 100)
@@ -3792,7 +4952,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "addcc %1,%2,%0")
+  "addcc\\t%1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_plus_set"
   [(set (reg:CCX_NOOV 100)
@@ -3802,12 +4964,14 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (match_dup 1) (match_dup 2)))]
   "TARGET_ARCH64"
-  "addcc %1,%2,%0")
+  "addcc\\t%1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_expand "subdi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(minus:DI (match_operand:DI 1 "register_operand" "r")
-		  (match_operand:DI 2 "arith_double_operand" "rHI")))]
+		  (match_operand:DI 2 "arith_double_add_operand" "rHI")))]
   ""
   "
 {
@@ -3821,6 +4985,13 @@ return \"srl %1,0,%0\";
 				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
       DONE;
     }
+  if (arith_double_4096_operand(operands[2], DImode))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_PLUS (DImode, operands[1],
+					    GEN_INT(-4096))));
+      DONE;
+    }
 }")
 
 (define_insn "*subdi3_sp32"
@@ -3829,29 +5000,64 @@ return \"srl %1,0,%0\";
 		  (match_operand:DI 2 "arith_double_operand" "rHI")))
    (clobber (reg:CC 100))]
   "! TARGET_ARCH64"
-  "*
+  "#"
+  [(set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (match_operand:DI 2 "arith_double_operand" "")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && (GET_CODE (operands[2]) == CONST_INT
+       || GET_CODE (operands[2]) == CONST_DOUBLE)"
+  [(clobber (const_int 0))]
+  "
 {
-  rtx op2 = operands[2];
+  rtx highp, lowp;
 
-  if (GET_CODE (op2) == CONST_INT
-      || GET_CODE (op2) == CONST_DOUBLE)
+  highp = gen_highpart (SImode, operands[2]);
+  lowp = gen_lowpart (SImode, operands[2]);
+  if ((lowp == const0_rtx)
+      && (operands[0] == operands[1]))
     {
-      rtx xoperands[4];
-      xoperands[0] = operands[0];
-      xoperands[1] = operands[1];
-      if (WORDS_BIG_ENDIAN)
-	split_double (op2, &xoperands[2], &xoperands[3]);
-      else
-	split_double (op2, &xoperands[3], &xoperands[2]);
-      if (xoperands[3] == const0_rtx && xoperands[0] == xoperands[1])
-	output_asm_insn (\"sub %H1,%2,%H0\", xoperands);
-      else
-	output_asm_insn (\"subcc %L1,%3,%L0\;subx %H1,%2,%H0\", xoperands);
-      return \"\";
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              gen_highpart (SImode, operands[0]),
+                              gen_rtx_MINUS (SImode,
+                                             gen_highpart (SImode, operands[1]),
+                                             highp)));
     }
-  return \"subcc %L1,%L2,%L0\;subx %H1,%H2,%H0\";
-}"
-  [(set_attr "length" "2")])
+  else
+    {
+      emit_insn (gen_cmp_minus_cc_set (gen_lowpart (SImode, operands[0]),
+                                       gen_lowpart (SImode, operands[1]),
+                                       lowp));
+      emit_insn (gen_subx (gen_highpart (SImode, operands[0]),
+                           gen_highpart (SImode, operands[1]),
+                           highp));
+    }
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64
+   && reload_completed"
+  [(clobber (const_int 0))]
+  "
+{
+  emit_insn (gen_cmp_minus_cc_set (gen_lowpart (SImode, operands[0]),
+                                   gen_lowpart (SImode, operands[1]),
+                                   gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_subx (gen_highpart (SImode, operands[0]),
+                       gen_highpart (SImode, operands[1]),
+                       gen_highpart (SImode, operands[2])));
+  DONE;
+}")
 
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3859,34 +5065,73 @@ return \"srl %1,0,%0\";
                 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
    (clobber (reg:CC 100))]
   "! TARGET_ARCH64"
-  "subcc %L1,%2,%L0\;addx %H1,0,%H0"
-  [(set_attr "type" "multi")])
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (zero_extend:DI (match_operand:SI 2 "register_operand" ""))))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                   (compare:CC_NOOV (minus:SI (match_dup 3) (match_dup 2))
+                                    (const_int 0)))
+              (set (match_dup 5) (minus:SI (match_dup 3) (match_dup 2)))])
+   (set (match_dup 6)
+        (minus:SI (minus:SI (match_dup 4) (const_int 0))
+                  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[1]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[0]);
+   operands[6] = gen_highpart (SImode, operands[0]);")
 
 (define_insn "*subdi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(minus:DI (match_operand:DI 1 "register_operand" "r")
 		  (match_operand:DI 2 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
-  "sub %1,%2,%0")
+  "sub\\t%1, %2, %0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "1")])
 
-(define_insn "subsi3"
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(minus:SI (match_operand:SI 1 "register_operand" "r,d")
+		  (match_operand:SI 2 "arith_add_operand" "rI,d")))]
+  ""
+  "
+{
+  if (arith_4096_operand(operands[2], DImode))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_PLUS (SImode, operands[1],
+					    GEN_INT(-4096))));
+      DONE;
+    }
+}")
+
+(define_insn "*subsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
 	(minus:SI (match_operand:SI 1 "register_operand" "r,d")
 		  (match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
   "@
-   sub %1,%2,%0
-   fpsub32s %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   sub\\t%1, %2, %0
+   fpsub32s\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_minus_cc"
   [(set (reg:CC_NOOV 100)
-	(compare:CC_NOOV (minus:SI (match_operand:SI 0 "register_operand" "r")
+	(compare:CC_NOOV (minus:SI (match_operand:SI 0 "reg_or_0_operand" "rJ")
 				   (match_operand:SI 1 "arith_operand" "rI"))
 			 (const_int 0)))]
-  ""
-  "subcc %0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "subcc\\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_minus_ccx"
   [(set (reg:CCX_NOOV 100)
@@ -3894,18 +5139,21 @@ return \"srl %1,0,%0\";
 				    (match_operand:DI 1 "arith_double_operand" "rHI"))
 			  (const_int 0)))]
   "TARGET_ARCH64"
-  "subcc %0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "subcc\\t%0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
-(define_insn "*cmp_minus_cc_set"
+(define_insn "cmp_minus_cc_set"
   [(set (reg:CC_NOOV 100)
-	(compare:CC_NOOV (minus:SI (match_operand:SI 1 "register_operand" "r")
+	(compare:CC_NOOV (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
 				   (match_operand:SI 2 "arith_operand" "rI"))
 			 (const_int 0)))
    (set (match_operand:SI 0 "register_operand" "=r")
 	(minus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "subcc %1,%2,%0")
+  "subcc\\t%r1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_minus_ccx_set"
   [(set (reg:CCX_NOOV 100)
@@ -3915,7 +5163,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(minus:DI (match_dup 1) (match_dup 2)))]
   "TARGET_ARCH64"
-  "subcc %1,%2,%0")
+  "subcc\\t%1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 ;; Integer Multiply/Divide.
 
@@ -3928,8 +5178,9 @@ return \"srl %1,0,%0\";
 	(mult:SI (match_operand:SI 1 "arith_operand" "%r")
 		 (match_operand:SI 2 "arith_operand" "rI")))]
   "TARGET_HARD_MUL"
-  "smul %1,%2,%0"
-  [(set_attr "type" "imul")])
+  "smul\\t%1, %2, %0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "1")])
 
 (define_expand "muldi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3950,9 +5201,12 @@ return \"srl %1,0,%0\";
 	(mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
-  "mulx %1,%2,%0")
+  "mulx\\t%1, %2, %0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "1")])
 
 ;; V8plus wide multiply.
+;; XXX
 (define_insn "muldi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=r,h")
 	(mult:DI (match_operand:DI 1 "arith_double_operand" "%r,0")
@@ -3963,15 +5217,15 @@ return \"srl %1,0,%0\";
   "*
 {
   if (sparc_check_64 (operands[1], insn) <= 0)
-    output_asm_insn (\"srl %L1,0,%L1\", operands);
+    output_asm_insn (\"srl\\t%L1, 0, %L1\", operands);
   if (which_alternative == 1)
-    output_asm_insn (\"sllx %H1,32,%H1\", operands);
+    output_asm_insn (\"sllx\\t%H1, 32, %H1\", operands);
   if (sparc_check_64 (operands[2], insn) <= 0)
-    output_asm_insn (\"srl %L2,0,%L2\", operands);
+    output_asm_insn (\"srl\\t%L2, 0, %L2\", operands);
   if (which_alternative == 1)
-    return \"or %L1,%H1,%H1\;sllx %H2,32,%L1\;or %L2,%L1,%L1\;mulx %H1,%L1,%L0\;srlx %L0,32,%H0\";
+    return \"or\\t%L1, %H1, %H1\\n\\tsllx\\t%H2, 32, %L1\\n\\tor\\t%L2, %L1, %L1\\n\\tmulx\\t%H1, %L1, %L0\;srlx\\t%L0, 32, %H0\";
   else
-    return \"sllx %H1,32,%3\;sllx %H2,32,%4\;or %L1,%3,%3\;or %L2,%4,%4\;mulx %3,%4,%3\;srlx %3,32,%H0\;mov %3,%L0\";
+    return \"sllx\\t%H1, 32, %3\\n\\tsllx\\t%H2, 32, %4\\n\\tor\\t%L1, %3, %3\\n\\tor\\t%L2, %4, %4\\n\\tmulx\\t%3, %4, %3\\n\\tsrlx\\t%3, 32, %H0\\n\\tmov\\t%3, %L0\";
 }"
   [(set_attr "length" "9,8")])
 
@@ -3985,8 +5239,9 @@ return \"srl %1,0,%0\";
 	(compare:CC_NOOV (mult:SI (match_dup 1) (match_dup 2))
 			 (const_int 0)))]
   "TARGET_V8 || TARGET_SPARCLITE || TARGET_DEPRECATED_V8_INSNS"
-  "smulcc %1,%2,%0"
-  [(set_attr "type" "imul")])
+  "smulcc\\t%1, %2, %0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "1")])
 
 (define_expand "mulsidi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4015,6 +5270,7 @@ return \"srl %1,0,%0\";
 
 ;; V9 puts the 64 bit product in a 64 bit register.  Only out or global
 ;; registers can hold 64 bit values in the V8plus environment.
+;; XXX
 (define_insn "mulsidi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h,r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
@@ -4022,10 +5278,11 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 3 "=X,&h"))]
   "TARGET_V8PLUS"
   "@
-   smul %1,%2,%L0\;srlx %L0,32,%H0
-   smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+   smul\\t%1, %2, %L0\\n\\tsrlx\\t%L0, 32, %H0
+   smul\\t%1, %2, %3\\n\\tsrlx\\t%3, 32, %H0\\n\\tmov\\t%3, %L0"
   [(set_attr "length" "2,3")])
 
+;; XXX
 (define_insn "const_mulsidi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h,r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
@@ -4033,10 +5290,11 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 3 "=X,&h"))]
   "TARGET_V8PLUS"
   "@
-   smul %1,%2,%L0\;srlx %L0,32,%H0
-   smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+   smul\\t%1, %2, %L0\\n\\tsrlx\\t%L0, 32, %H0
+   smul\\t%1, %2, %3\\n\\tsrlx\\t%3, 32, %H0\\n\\tmov\\t%3, %L0"
   [(set_attr "length" "2,3")])
 
+;; XXX
 (define_insn "*mulsidi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -4044,7 +5302,7 @@ return \"srl %1,0,%0\";
   "TARGET_HARD_MUL32"
   "*
 {
-  return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
+  return TARGET_SPARCLET ? \"smuld\\t%1, %2, %L0\" : \"smul\\t%1, %2, %L0\\n\\trd\\t%%y, %H0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "sparclet")
@@ -4052,6 +5310,7 @@ return \"srl %1,0,%0\";
 
 ;; Extra pattern, because sign_extend of a constant isn't valid.
 
+;; XXX
 (define_insn "const_mulsidi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -4059,7 +5318,7 @@ return \"srl %1,0,%0\";
   "TARGET_HARD_MUL"
   "*
 {
-  return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
+  return TARGET_SPARCLET ? \"smuld\\t%1, %2, %L0\" : \"smul\\t%1, %2, %L0\\n\\trd\\t%%y, %H0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "sparclet")
@@ -4095,6 +5354,7 @@ return \"srl %1,0,%0\";
     }
 }")
 
+;; XXX
 (define_insn "smulsi3_highpart_v8plus"
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(truncate:SI
@@ -4109,6 +5369,7 @@ return \"srl %1,0,%0\";
   [(set_attr "length" "2")])
 
 ;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
+;; XXX
 (define_insn ""
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(subreg:SI
@@ -4120,10 +5381,11 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 4 "=X,&h"))]
   "TARGET_V8PLUS"
   "@
-   smul %1,%2,%0\;srlx %0,%3,%0
-   smul %1,%2,%4\;srlx %4,%3,%0"
+   smul\\t%1, %2, %0\\n\\tsrlx\\t%0, %3, %0
+   smul\\t%1, %2, %4\\n\\tsrlx\\t%4, %3, %0"
   [(set_attr "length" "2")])
 
+;; XXX
 (define_insn "const_smulsi3_highpart_v8plus"
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(truncate:SI
@@ -4133,28 +5395,32 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 4 "=X,&h"))]
   "TARGET_V8PLUS"
   "@
-   smul %1,%2,%0\;srlx %0,%3,%0
-   smul %1,%2,%4\;srlx %4,%3,%0"
+   smul\\t%1, %2, %0\\n\\tsrlx\\t%0, %3, %0
+   smul\\t%1, %2, %4\\n\\tsrlx\\t%4, %3, %0"
   [(set_attr "length" "2")])
 
+;; XXX
 (define_insn "*smulsi3_highpart_sp32"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL32"
-  "smul %1,%2,%%g0\;rd %%y,%0"
+  "TARGET_HARD_MUL32
+   && ! TARGET_LIVE_G0"
+  "smul\\t%1, %2, %%g0\\n\\trd\\t%%y, %0"
   [(set_attr "length" "2")])
 
+;; XXX
 (define_insn "const_smulsi3_highpart"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (match_operand:SI 2 "register_operand" "r"))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL32"
-  "smul %1,%2,%%g0\;rd %%y,%0"
+  "TARGET_HARD_MUL32
+   && ! TARGET_LIVE_G0"
+  "smul\\t%1, %2, %%g0\\n\\trd\\t%%y, %0"
   [(set_attr "length" "2")])
 
 (define_expand "umulsidi3"
@@ -4182,6 +5448,7 @@ return \"srl %1,0,%0\";
     }
 }")
 
+;; XXX
 (define_insn "umulsidi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h,r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
@@ -4189,10 +5456,11 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 3 "=X,&h"))]
   "TARGET_V8PLUS"
   "@
-   umul %1,%2,%L0\;srlx %L0,32,%H0
-   umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+   umul\\t%1, %2, %L0\\n\\tsrlx\\t%L0, 32, %H0
+   umul\\t%1, %2, %3\\n\\tsrlx\\t%3, 32, %H0\\n\\tmov\\t%3, %L0"
   [(set_attr "length" "2,3")])
 
+;; XXX
 (define_insn "*umulsidi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -4200,7 +5468,7 @@ return \"srl %1,0,%0\";
   "TARGET_HARD_MUL32"
   "*
 {
-  return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
+  return TARGET_SPARCLET ? \"umuld\\t%1, %2, %L0\" : \"umul\\t%1, %2, %L0\\n\\trd\\t%%y, %H0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "sparclet")
@@ -4208,6 +5476,7 @@ return \"srl %1,0,%0\";
 
 ;; Extra pattern, because sign_extend of a constant isn't valid.
 
+;; XXX
 (define_insn "const_umulsidi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -4215,12 +5484,13 @@ return \"srl %1,0,%0\";
   "TARGET_HARD_MUL32"
   "*
 {
-  return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
+  return TARGET_SPARCLET ? \"umuld\\t%1, %2, %L0\" : \"umul\\t%1, %2, %L0\\n\\trd\\t%%y, %H0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "sparclet")
 		      (const_int 1) (const_int 2)))])
 
+;; XXX
 (define_insn "const_umulsidi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h,r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
@@ -4228,8 +5498,8 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 3 "=X,h"))]
   "TARGET_V8PLUS"
   "@
-   umul %1,%2,%L0\;srlx %L0,32,%H0
-   umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+   umul\\t%1, %2, %L0\\n\\tsrlx\\t%L0, 32, %H0
+   umul\\t%1, %2, %3\\n\\tsrlx\\t%3, 32, %H0\\n\\tmov\\t%3, %L0"
   [(set_attr "length" "2,3")])
 
 (define_expand "umulsi3_highpart"
@@ -4262,6 +5532,7 @@ return \"srl %1,0,%0\";
     }
 }")
 
+;; XXX
 (define_insn "umulsi3_highpart_v8plus"
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(truncate:SI
@@ -4271,10 +5542,11 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 4 "=X,h"))]
   "TARGET_V8PLUS"
   "@
-   umul %1,%2,%0\;srlx %0,%3,%0
-   umul %1,%2,%4\;srlx %4,%3,%0"
+   umul\\t%1, %2, %0\\n\\tsrlx\\t%0, %3, %0
+   umul\\t%1, %2, %4\\n\\tsrlx\\t%4, %3, %0"
   [(set_attr "length" "2")])
 
+;; XXX
 (define_insn "const_umulsi3_highpart_v8plus"
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(truncate:SI
@@ -4284,51 +5556,58 @@ return \"srl %1,0,%0\";
    (clobber (match_scratch:SI 4 "=X,h"))]
   "TARGET_V8PLUS"
   "@
-   umul %1,%2,%0\;srlx %0,%3,%0
-   umul %1,%2,%4\;srlx %4,%3,%0"
+   umul\\t%1, %2, %0\\n\\tsrlx\\t%0, %3, %0
+   umul\\t%1, %2, %4\\n\\tsrlx\\t%4, %3, %0"
   [(set_attr "length" "2")])
 
+;; XXX
 (define_insn "*umulsi3_highpart_sp32"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL32"
-  "umul %1,%2,%%g0\;rd %%y,%0"
+  "TARGET_HARD_MUL32
+   && ! TARGET_LIVE_G0"
+  "umul\\t%1, %2, %%g0\\n\\trd\\t%%y, %0"
   [(set_attr "length" "2")])
 
+;; XXX
 (define_insn "const_umulsi3_highpart"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (match_operand:SI 2 "uns_small_int" ""))
 		      (const_int 32))))]
-  "TARGET_HARD_MUL32"
-  "umul %1,%2,%%g0\;rd %%y,%0"
+  "TARGET_HARD_MUL32
+   && ! TARGET_LIVE_G0"
+  "umul\\t%1, %2, %%g0\\n\\trd\\t%%y, %0"
   [(set_attr "length" "2")])
 
 ;; The v8 architecture specifies that there must be 3 instructions between
 ;; a y register write and a use of it for correct results.
 
+;; XXX SHEESH
 (define_insn "divsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(div:SI (match_operand:SI 1 "register_operand" "r,r")
-		(match_operand:SI 2 "move_operand" "rI,m")))
+		(match_operand:SI 2 "input_operand" "rI,m")))
    (clobber (match_scratch:SI 3 "=&r,&r"))]
-  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+  "(TARGET_V8
+    || TARGET_DEPRECATED_V8_INSNS)
+   && ! TARGET_LIVE_G0"
   "*
 {
   if (which_alternative == 0)
   if (TARGET_V9)
-    return \"sra %1,31,%3\;wr %%g0,%3,%%y\;sdiv %1,%2,%0\";
+    return \"sra\\t%1, 31, %3\\n\\twr\\t%%g0, %3, %%y\\n\\tsdiv\\t%1, %2, %0\";
   else
-    return \"sra %1,31,%3\;wr %%g0,%3,%%y\;nop\;nop\;nop\;sdiv %1,%2,%0\";
+    return \"sra\\t%1, 31, %3\\n\\twr\\t%%g0, %3, %%y\\n\\tnop\\n\\tnop\\n\\tnop\\n\\tsdiv\\t%1, %2, %0\";
   else
     if (TARGET_V9)
-      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;sdiv %1,%3,%0\";
+      return \"sra\\t%1, 31, %3\\n\\twr\\t%%g0, %3, %%y\\n\\tld\\t%2, %3\\n\\tsdiv\\t%1, %3, %0\";
     else
-      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;nop\;nop\;sdiv %1,%3,%0\";
+      return \"sra\\t%1, 31, %3\\n\\twr\\t%%g0, %3, %%y\\n\\tld\\t%2, %3\\n\\tnop\\n\\tnop\\n\\tsdiv\\t%1, %3, %0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "v9")
@@ -4339,10 +5618,11 @@ return \"srl %1,0,%0\";
 	(div:DI (match_operand:DI 1 "register_operand" "r")
 		(match_operand:DI 2 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
-  "sdivx %1,%2,%0")
+  "sdivx\\t%1, %2, %0")
 
 ;; It is not known whether this will match.
 
+;; XXX I hope it doesn't fucking match...
 (define_insn "*cmp_sdiv_cc_set"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(div:SI (match_operand:SI 1 "register_operand" "r")
@@ -4351,36 +5631,41 @@ return \"srl %1,0,%0\";
 	(compare:CC (div:SI (match_dup 1) (match_dup 2))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+  "(TARGET_V8
+    || TARGET_DEPRECATED_V8_INSNS)
+   && ! TARGET_LIVE_G0"
   "*
 {
   if (TARGET_V9)
-    return \"sra %1,31,%3\;wr %%g0,%3,%%y\;sdivcc %1,%2,%0\";
+    return \"sra\\t%1, 31, %3\\n\\twr\\t%%g0, %3, %%y\\n\\tsdivcc\\t%1, %2, %0\";
   else
-    return \"sra %1,31,%3\;wr %%g0,%3,%%y\;nop\;nop\;nop\;sdivcc %1,%2,%0\";
+    return \"sra\\t%1, 31, %3\\n\\twr\\t%%g0, %3, %%y\\n\\tnop\\n\\tnop\\n\\tnop\\n\\tsdivcc\\t%1, %2, %0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "v9")
 		      (const_int 3) (const_int 6)))])
 
+;; XXX
 (define_insn "udivsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,&r,&r")
 	(udiv:SI (match_operand:SI 1 "reg_or_nonsymb_mem_operand" "r,r,m")
-		 (match_operand:SI 2 "move_operand" "rI,m,r")))]
-  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+		 (match_operand:SI 2 "input_operand" "rI,m,r")))]
+  "(TARGET_V8
+    || TARGET_DEPRECATED_V8_INSNS)
+   && ! TARGET_LIVE_G0"
   "*
 {
-  output_asm_insn (\"wr %%g0,%%g0,%%y\", operands);
+  output_asm_insn (\"wr\\t%%g0, %%g0, %%y\", operands);
   switch (which_alternative)
     {
     default:
   if (TARGET_V9)
-	return \"udiv %1,%2,%0\";
-      return \"nop\;nop\;nop\;udiv %1,%2,%0\";
+	return \"udiv\\t%1, %2, %0\";
+      return \"nop\\n\\tnop\\n\\tnop\\n\\tudiv\\t%1, %2, %0\";
     case 1:
-      return \"ld %2,%0\;nop\;nop\;udiv %1,%0,%0\";
+      return \"ld\\t%2, %0\\n\\tnop\\n\\tnop\\n\\tudiv\\t%1, %0, %0\";
     case 2:
-      return \"ld %1,%0\;nop\;nop\;udiv %0,%2,%0\";
+      return \"ld\\t%1, %0\\n\\tnop\\n\\tnop\\n\\tudiv\\t%0, %2, %0\";
     }
 }"
   [(set (attr "length")
@@ -4393,10 +5678,11 @@ return \"srl %1,0,%0\";
 	(udiv:DI (match_operand:DI 1 "register_operand" "r")
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
   "TARGET_ARCH64"
-  "udivx %1,%2,%0")
+  "udivx\\t%1, %2, %0")
 
 ;; It is not known whether this will match.
 
+;; XXX I hope it doesn't fucking match...
 (define_insn "*cmp_udiv_cc_set"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(udiv:SI (match_operand:SI 1 "register_operand" "r")
@@ -4404,13 +5690,15 @@ return \"srl %1,0,%0\";
    (set (reg:CC 100)
 	(compare:CC (udiv:SI (match_dup 1) (match_dup 2))
 		    (const_int 0)))]
-  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+  "(TARGET_V8
+    || TARGET_DEPRECATED_V8_INSNS)
+   && ! TARGET_LIVE_G0"
   "*
 {
   if (TARGET_V9)
-    return \"wr %%g0,%%g0,%%y\;udivcc %1,%2,%0\";
+    return \"wr\\t%%g0, %%g0, %%y\\n\\tudivcc\\t%1, %2, %0\";
   else
-    return \"wr %%g0,%%g0,%%y\;nop\;nop\;nop\;udivcc %1,%2,%0\";
+    return \"wr\\t%%g0, %%g0, %%y\\n\\tnop\\n\\tnop\\n\\tnop\\n\\tudivcc\\t%1, %2, %0\";
 }"
   [(set (attr "length")
 	(if_then_else (eq_attr "isa" "v9")
@@ -4424,8 +5712,9 @@ return \"srl %1,0,%0\";
 			  (match_operand:SI 2 "arith_operand" "rI"))
 		 (match_operand:SI 3 "register_operand" "0")))]
   "TARGET_SPARCLET"
-  "smac %1,%2,%0"
-  [(set_attr "type" "imul")])
+  "smac\\t%1, %2, %0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "1")])
 
 (define_insn "*smacdi"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4435,8 +5724,9 @@ return \"srl %1,0,%0\";
 			   (match_operand:SI 2 "register_operand" "r")))
 		 (match_operand:DI 3 "register_operand" "0")))]
   "TARGET_SPARCLET"
-  "smacd %1,%2,%L0"
-  [(set_attr "type" "imul")])
+  "smacd\\t%1, %2, %L0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "1")])
 
 (define_insn "*umacdi"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4446,8 +5736,9 @@ return \"srl %1,0,%0\";
 			   (match_operand:SI 2 "register_operand" "r")))
 		 (match_operand:DI 3 "register_operand" "0")))]
   "TARGET_SPARCLET"
-  "umacd %1,%2,%L0"
-  [(set_attr "type" "imul")])
+  "umacd\\t%1, %2, %L0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "1")])
 
 ;;- Boolean instructions
 ;; We define DImode `and' so with DImode `not' we can get
@@ -4465,36 +5756,22 @@ return \"srl %1,0,%0\";
 	(and:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
 		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "! TARGET_ARCH64"
-  "*
-{
-  rtx op2 = operands[2];
-
-  if (which_alternative == 1)
-    return \"fand %1,%2,%0\";
-
-  if (GET_CODE (op2) == CONST_INT
-      || GET_CODE (op2) == CONST_DOUBLE)
-    {
-      rtx xoperands[4];
-      xoperands[0] = operands[0];
-      xoperands[1] = operands[1];
-      if (WORDS_BIG_ENDIAN)
-	split_double (op2, &xoperands[2], &xoperands[3]);
-      else
-	split_double (op2, &xoperands[3], &xoperands[2]);
-      output_asm_insn (\"and %L1,%3,%L0\;and %H1,%2,%H0\", xoperands);
-      return \"\";
-    }
-  return \"and %1,%2,%0\;and %R1,%R2,%R0\";
-}"
-  [(set_attr "length" "2,1")])
+  "@
+  #
+  fand\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "2,1")])
 
 (define_insn "*anddi3_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(and:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
+		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "TARGET_ARCH64"
-  "and %1,%2,%0")
+  "@
+   and\\t%1, %2, %0
+   fand\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_insn "andsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
@@ -4502,9 +5779,10 @@ return \"srl %1,0,%0\";
 		(match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
   "@
-   and %1,%2,%0
-   fands %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   and\\t%1, %2, %0
+   fands\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
@@ -4527,12 +5805,19 @@ return \"srl %1,0,%0\";
 	(match_operator:DI 1 "cc_arithop"	; AND, IOR, XOR
 			   [(match_operand:DI 2 "register_operand" "")
 			    (match_operand:DI 3 "arith_double_operand" "")]))]
-  "! TARGET_ARCH64 && reload_completed
-   && GET_CODE (operands[0]) == REG && REGNO (operands[0]) < 32"
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
   [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
    (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
   "
 {
+  if (GET_CODE (operands[0]) == SUBREG)
+    operands[0] = alter_subreg (operands[0]);
   operands[4] = gen_highpart (SImode, operands[0]);
   operands[5] = gen_lowpart (SImode, operands[0]);
   operands[6] = gen_highpart (SImode, operands[2]);
@@ -4555,16 +5840,43 @@ return \"srl %1,0,%0\";
 		(match_operand:DI 2 "register_operand" "r,b")))]
   "! TARGET_ARCH64"
   "@
-   andn %2,%1,%0\;andn %R2,%R1,%R0
-   fandnot1 %1,%2,%0"
-  [(set_attr "length" "2,1")])
+   #
+   fandnot1\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "2,1")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (and:DI (not:DI (match_operand:DI 1 "register_operand" ""))
+                (match_operand:DI 2 "register_operand" "")))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (and:SI (not:SI (match_dup 7)) (match_dup 8)))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*and_not_di_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
-		(match_operand:DI 2 "register_operand" "r")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
+		(match_operand:DI 2 "register_operand" "r,b")))]
   "TARGET_ARCH64"
-  "andn %2,%1,%0")
+  "@
+   andn\\t%2, %1, %0
+   fandnot1\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_insn "*and_not_si"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
@@ -4572,9 +5884,10 @@ return \"srl %1,0,%0\";
 		(match_operand:SI 2 "register_operand" "r,d")))]
   ""
   "@
-   andn %2,%1,%0
-   fandnot1s %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   andn\\t%2, %1, %0
+   fandnot1s\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_expand "iordi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4588,36 +5901,22 @@ return \"srl %1,0,%0\";
 	(ior:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
 		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "! TARGET_ARCH64"
-  "*
-{
-  rtx op2 = operands[2];
-
-  if (which_alternative == 1)
-    return \"for %1,%2,%0\";
-
-  if (GET_CODE (op2) == CONST_INT
-      || GET_CODE (op2) == CONST_DOUBLE)
-    {
-      rtx xoperands[4];
-      xoperands[0] = operands[0];
-      xoperands[1] = operands[1];
-      if (WORDS_BIG_ENDIAN)
-	split_double (op2, &xoperands[2], &xoperands[3]);
-      else
-	split_double (op2, &xoperands[3], &xoperands[2]);
-      output_asm_insn (\"or %L1,%3,%L0\;or %H1,%2,%H0\", xoperands);
-      return \"\";
-    }
-  return \"or %1,%2,%0\;or %R1,%R2,%R0\";
-}"
-  [(set_attr "length" "2,1")])
+  "@
+  #
+  for\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "2,1")])
 
 (define_insn "*iordi3_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(ior:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(ior:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
+		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "TARGET_ARCH64"
-  "or %1,%2,%0")
+  "@
+  or\\t%1, %2, %0
+  for\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_insn "iorsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
@@ -4625,9 +5924,10 @@ return \"srl %1,0,%0\";
 		(match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
   "@
-   or %1,%2,%0
-   fors %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   or\\t%1, %2, %0
+   fors\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
@@ -4650,16 +5950,43 @@ return \"srl %1,0,%0\";
 		(match_operand:DI 2 "register_operand" "r,b")))]
   "! TARGET_ARCH64"
   "@
-   orn %2,%1,%0\;orn %R2,%R1,%R0
-   fornot1 %1,%2,%0"
-  [(set_attr "length" "2,1")])
+   #
+   fornot1\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "2,1")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (ior:DI (not:DI (match_operand:DI 1 "register_operand" ""))
+                (match_operand:DI 2 "register_operand" "")))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (ior:SI (not:SI (match_dup 7)) (match_dup 8)))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*or_not_di_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
-		(match_operand:DI 2 "register_operand" "r")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
+		(match_operand:DI 2 "register_operand" "r,b")))]
   "TARGET_ARCH64"
-  "orn %2,%1,%0")
+  "@
+  orn\\t%2, %1, %0
+  fornot1\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_insn "*or_not_si"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
@@ -4667,9 +5994,10 @@ return \"srl %1,0,%0\";
 		(match_operand:SI 2 "register_operand" "r,d")))]
   ""
   "@
-   orn %2,%1,%0
-   fornot1s %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   orn\\t%2, %1, %0
+   fornot1s\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_expand "xordi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4683,37 +6011,32 @@ return \"srl %1,0,%0\";
 	(xor:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
 		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "! TARGET_ARCH64"
-  "*
-{
-  rtx op2 = operands[2];
-
-  if (which_alternative == 1)
-    return \"fxor %1,%2,%0\";
-
-  if (GET_CODE (op2) == CONST_INT
-      || GET_CODE (op2) == CONST_DOUBLE)
-    {
-      rtx xoperands[4];
-      xoperands[0] = operands[0];
-      xoperands[1] = operands[1];
-      if (WORDS_BIG_ENDIAN)
-	split_double (op2, &xoperands[2], &xoperands[3]);
-      else
-	split_double (op2, &xoperands[3], &xoperands[2]);
-      output_asm_insn (\"xor %L1,%3,%L0\;xor %H1,%2,%H0\", xoperands);
-      return \"\";
-    }
-  return \"xor %1,%2,%0\;xor %R1,%R2,%R0\";
-}"
+  "@
+  #
+  fxor\\t%1, %2, %0"
   [(set_attr "length" "2,1")
    (set_attr "type" "ialu,fp")])
 
 (define_insn "*xordi3_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(xor:DI (match_operand:DI 1 "arith_double_operand" "%rJ")
-		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(xor:DI (match_operand:DI 1 "arith_double_operand" "%rJ,b")
+		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "TARGET_ARCH64"
-  "xor %r1,%2,%0")
+  "@
+  xor\\t%r1, %2, %0
+  fxor\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
+
+(define_insn "*xordi3_sp64_dbl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "register_operand" "r")
+		(match_operand:DI 2 "const64_operand" "")))]
+  "(TARGET_ARCH64
+    && HOST_BITS_PER_WIDE_INT != 64)"
+  "xor\\t%1, %2, %0"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
 
 (define_insn "xorsi3"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
@@ -4721,9 +6044,10 @@ return \"srl %1,0,%0\";
 		(match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
   "@
-   xor %r1,%2,%0
-   fxors %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   xor\\t%r1, %2, %0
+   fxors\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
@@ -4763,18 +6087,43 @@ return \"srl %1,0,%0\";
 			(match_operand:DI 2 "register_operand" "r,b"))))]
   "! TARGET_ARCH64"
   "@
-   xnor %1,%2,%0\;xnor %R1,%R2,%R0
-   fxnor %1,%2,%0"
+   #
+   fxnor\\t%1, %2, %0"
   [(set_attr "length" "2,1")
    (set_attr "type" "ialu,fp")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (not:DI (xor:DI (match_operand:DI 1 "register_operand" "")
+                        (match_operand:DI 2 "register_operand" ""))))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 3) (not:SI (xor:SI (match_dup 4) (match_dup 5))))
+   (set (match_dup 6) (not:SI (xor:SI (match_dup 7) (match_dup 8))))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);")
+
 (define_insn "*xor_not_di_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(not:DI (xor:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
-			(match_operand:DI 2 "arith_double_operand" "rHI"))))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(not:DI (xor:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,b")
+			(match_operand:DI 2 "arith_double_operand" "rHI,b"))))]
   "TARGET_ARCH64"
-  "xnor %r1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+  xnor\\t%r1, %2, %0
+  fxnor\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 (define_insn "*xor_not_si"
   [(set (match_operand:SI 0 "register_operand" "=r,d")
@@ -4782,9 +6131,10 @@ return \"srl %1,0,%0\";
 			(match_operand:SI 2 "arith_operand" "rI,d"))))]
   ""
   "@
-   xnor %r1,%2,%0
-   fxnors %1,%2,%0"
-  [(set_attr "type" "ialu,fp")])
+   xnor\\t%r1, %2, %0
+   fxnors\\t%1, %2, %0"
+  [(set_attr "type" "ialu,fp")
+   (set_attr "length" "1,1")])
 
 ;; These correspond to the above in the case where we also (or only)
 ;; want to set the condition code.  
@@ -4796,9 +6146,10 @@ return \"srl %1,0,%0\";
 			    [(match_operand:SI 0 "arith_operand" "%r")
 			     (match_operand:SI 1 "arith_operand" "rI")])
 	 (const_int 0)))]
-  ""
-  "%A2cc %0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "%A2cc\\t%0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_arith_op"
   [(set (reg:CCX 100)
@@ -4808,8 +6159,9 @@ return \"srl %1,0,%0\";
 			     (match_operand:DI 1 "arith_double_operand" "rHI")])
 	 (const_int 0)))]
   "TARGET_ARCH64"
-  "%A2cc %0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "%A2cc\\t%0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_arith_op_set"
   [(set (reg:CC 100)
@@ -4821,7 +6173,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(match_dup 3))]
   ""
-  "%A3cc %1,%2,%0")
+  "%A3cc\\t%1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_arith_op_set"
   [(set (reg:CCX 100)
@@ -4833,7 +6187,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(match_dup 3))]
   "TARGET_ARCH64"
-  "%A3cc %1,%2,%0")
+  "%A3cc\\t%1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_xor_not"
   [(set (reg:CC 100)
@@ -4841,9 +6197,10 @@ return \"srl %1,0,%0\";
 	 (not:SI (xor:SI (match_operand:SI 0 "reg_or_0_operand" "%rJ")
 			 (match_operand:SI 1 "arith_operand" "rI")))
 	 (const_int 0)))]
-  ""
-  "xnorcc %r0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "xnorcc\\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_xor_not"
   [(set (reg:CCX 100)
@@ -4852,8 +6209,9 @@ return \"srl %1,0,%0\";
 			 (match_operand:DI 1 "arith_double_operand" "rHI")))
 	 (const_int 0)))]
   "TARGET_ARCH64"
-  "xnorcc %r0,%1,%%g0"
-  [(set_attr "type" "compare")])
+  "xnorcc\\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_xor_not_set"
   [(set (reg:CC 100)
@@ -4864,7 +6222,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(not:SI (xor:SI (match_dup 1) (match_dup 2))))]
   ""
-  "xnorcc %r1,%2,%0")
+  "xnorcc\\t%r1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_xor_not_set"
   [(set (reg:CCX 100)
@@ -4875,7 +6235,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(not:DI (xor:DI (match_dup 1) (match_dup 2))))]
   "TARGET_ARCH64"
-  "xnorcc %r1,%2,%0")
+  "xnorcc\\t%r1, %2, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_arith_op_not"
   [(set (reg:CC 100)
@@ -4884,9 +6246,10 @@ return \"srl %1,0,%0\";
 			    [(not:SI (match_operand:SI 0 "arith_operand" "rI"))
 			     (match_operand:SI 1 "reg_or_0_operand" "rJ")])
 	 (const_int 0)))]
-  ""
-  "%B2cc %r1,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "%B2cc\\t%r1, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_arith_op_not"
   [(set (reg:CCX 100)
@@ -4896,8 +6259,9 @@ return \"srl %1,0,%0\";
 			     (match_operand:DI 1 "reg_or_0_operand" "rJ")])
 	 (const_int 0)))]
   "TARGET_ARCH64"
-  "%B2cc %r1,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "%B2cc\\t%r1, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_arith_op_not_set"
   [(set (reg:CC 100)
@@ -4909,7 +6273,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(match_dup 3))]
   ""
-  "%B3cc %r2,%1,%0")
+  "%B3cc\\t%r2, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_arith_op_not_set"
   [(set (reg:CCX 100)
@@ -4921,7 +6287,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(match_dup 3))]
   "TARGET_ARCH64"
-  "%B3cc %r2,%1,%0")
+  "%B3cc\\t%r2, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 ;; We cannot use the "neg" pseudo insn because the Sun assembler
 ;; does not know how to make it work for constants.
@@ -4947,54 +6315,81 @@ return \"srl %1,0,%0\";
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(neg:DI (match_operand:DI 1 "register_operand" "r")))
    (clobber (reg:CC 100))]
-  "! TARGET_ARCH64"
-  "*
-{
-  if (TARGET_LIVE_G0)
-    output_asm_insn (\"and %%g0,0,%%g0\", operands);
-  return \"subcc %%g0,%L1,%L0\;subx %%g0,%H1,%H0\";
-}"
+  "! TARGET_ARCH64
+   && ! TARGET_LIVE_G0"
+  "#"
   [(set_attr "type" "unary")
-   ;; ??? This is wrong for TARGET_LIVE_G0 but it's not critical.
    (set_attr "length" "2")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (neg:DI (match_operand:DI 1 "register_operand" "")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64
+   && ! TARGET_LIVE_G0
+   && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                   (compare:CC_NOOV (minus:SI (const_int 0) (match_dup 5))
+                                    (const_int 0)))
+              (set (match_dup 4) (minus:SI (const_int 0) (match_dup 5)))])
+   (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
+                                (ltu:SI (reg:CC 100) (const_int 0))))]
+  "operands[2] = gen_highpart (SImode, operands[0]);
+   operands[3] = gen_highpart (SImode, operands[1]);
+   operands[4] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_lowpart (SImode, operands[1]);")
+
 (define_insn "*negdi2_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
   "TARGET_ARCH64"
-  "sub %%g0,%1,%0"
+  "sub\\t%%g0, %1, %0"
   [(set_attr "type" "unary")
    (set_attr "length" "1")])
 
-(define_insn "negsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(neg:SI (match_operand:SI 1 "arith_operand" "rI")))]
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (neg:SI (match_operand:SI 1 "arith_operand" "")))]
   ""
-  "*
+  "
 {
   if (TARGET_LIVE_G0)
-    return \"and %%g0,0,%%g0\;sub %%g0,%1,%0\";
-  return \"sub %%g0,%1,%0\";
-}"
+    {
+      rtx zero_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, zero_reg, const0_rtx));
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+                                      gen_rtx_MINUS (SImode, zero_reg,
+                                                             operands[1])));
+      DONE;
+    }
+}")
+
+(define_insn "*negsi2_not_liveg0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "arith_operand" "rI")))]
+  "! TARGET_LIVE_G0"
+  "sub\\t%%g0, %1, %0"
   [(set_attr "type" "unary")
-   (set (attr "length")
-	(if_then_else (eq_attr "live_g0" "yes") (const_int 2) (const_int 1)))])
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_neg"
   [(set (reg:CC_NOOV 100)
 	(compare:CC_NOOV (neg:SI (match_operand:SI 0 "arith_operand" "rI"))
 			 (const_int 0)))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "subcc\\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_neg"
   [(set (reg:CCX_NOOV 100)
 	(compare:CCX_NOOV (neg:DI (match_operand:DI 0 "arith_double_operand" "rHI"))
 			  (const_int 0)))]
   "TARGET_ARCH64"
-  "subcc %%g0,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "subcc\\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_set_neg"
   [(set (reg:CC_NOOV 100)
@@ -5003,8 +6398,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(neg:SI (match_dup 1)))]
   "! TARGET_LIVE_G0"
-  "subcc %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "subcc\\t%%g0, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_set_neg"
   [(set (reg:CCX_NOOV 100)
@@ -5013,8 +6409,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(neg:DI (match_dup 1)))]
   "TARGET_ARCH64"
-  "subcc %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "subcc\\t%%g0, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 ;; We cannot use the "not" pseudo insn because the Sun assembler
 ;; does not know how to make it work for constants.
@@ -5029,53 +6426,99 @@ return \"srl %1,0,%0\";
 	(not:DI (match_operand:DI 1 "register_operand" "r,b")))]
   "! TARGET_ARCH64"
   "@
-   xnor %1,0,%0\;xnor %R1,0,%R0
-   fnot1 %1,%0"
+   #
+   fnot1\\t%1, %0"
   [(set_attr "type" "unary,fp")
    (set_attr "length" "2,1")])
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (not:DI (match_operand:DI 1 "register_operand" "")))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 2) (not:SI (xor:SI (match_dup 3) (const_int 0))))
+   (set (match_dup 4) (not:SI (xor:SI (match_dup 5) (const_int 0))))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   operands[2] = gen_highpart (SImode, operands[0]);
+   operands[3] = gen_highpart (SImode, operands[1]);
+   operands[4] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_lowpart (SImode, operands[1]);")
+
 (define_insn "*one_cmpldi2_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(not:DI (match_operand:DI 1 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(not:DI (match_operand:DI 1 "arith_double_operand" "rHI,b")))]
   "TARGET_ARCH64"
-  "xnor %1,0,%0"
-  [(set_attr "type" "unary")])
+  "@
+   xnor\\t%%g0, %1, %0
+   fnot1\\t%1, %0"
+  [(set_attr "type" "unary,fp")
+   (set_attr "length" "1")])
 
-(define_insn "one_cmplsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r,d")
-	(not:SI (match_operand:SI 1 "arith_operand" "r,I,d")))]
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (not:SI (match_operand:SI 1 "arith_operand" "")))]
   ""
-  "*
+  "
 {
-  if (which_alternative == 0)
-    return \"xnor %1,0,%0\";
-  if (which_alternative == 2)
-    return \"fnot1s %1,%0\";
-  if (TARGET_LIVE_G0)
-    output_asm_insn (\"and %%g0,0,%%g0\", operands);
-  return \"xnor %%g0,%1,%0\";
-}"
-  [(set_attr "type" "unary,unary,fp")
-   (set_attr_alternative "length"
-     [(const_int 1)
-      (if_then_else (eq_attr "live_g0" "yes") (const_int 2) (const_int 1))
-      (const_int 1)])])
+  if (TARGET_LIVE_G0
+      && GET_CODE (operands[1]) == CONST_INT)
+    {
+      rtx zero_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, zero_reg, const0_rtx));
+      emit_insn (gen_rtx_SET (VOIDmode,
+                              operands[0],
+                              gen_rtx_NOT (SImode,
+                                           gen_rtx_XOR (SImode,
+                                                        zero_reg,
+                                                        operands[1]))));
+      DONE;
+    }
+}")
+
+(define_insn "*one_cmplsi2_not_liveg0"
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(not:SI (match_operand:SI 1 "arith_operand" "rI,d")))]
+  "! TARGET_LIVE_G0"
+  "@
+  xnor\\t%%g0, %1, %0
+  fnot1s\\t%1, %0"
+  [(set_attr "type" "unary,fp")
+   (set_attr "length" "1,1")])
+
+(define_insn "*one_cmplsi2_liveg0"
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(not:SI (match_operand:SI 1 "arith_operand" "r,d")))]
+  "TARGET_LIVE_G0"
+  "@
+  xnor\\t%1, 0, %0
+  fnot1s\\t%1, %0"
+  [(set_attr "type" "unary,fp")
+   (set_attr "length" "1,1")])
 
 (define_insn "*cmp_cc_not"
   [(set (reg:CC 100)
 	(compare:CC (not:SI (match_operand:SI 0 "arith_operand" "rI"))
 		    (const_int 0)))]
   "! TARGET_LIVE_G0"
-  "xnorcc %%g0,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "xnorcc\\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_not"
   [(set (reg:CCX 100)
 	(compare:CCX (not:DI (match_operand:DI 0 "arith_double_operand" "rHI"))
 		     (const_int 0)))]
   "TARGET_ARCH64"
-  "xnorcc %%g0,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "xnorcc\\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_set_not"
   [(set (reg:CC 100)
@@ -5084,8 +6527,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(not:SI (match_dup 1)))]
   "! TARGET_LIVE_G0"
-  "xnorcc %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "xnorcc\\t%%g0, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_ccx_set_not"
   [(set (reg:CCX 100)
@@ -5094,8 +6538,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:DI 0 "register_operand" "=r")
 	(not:DI (match_dup 1)))]
   "TARGET_ARCH64"
-  "xnorcc %%g0,%1,%0"
-  [(set_attr "type" "unary")])
+  "xnorcc\\t%%g0, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 ;; Floating point arithmetic instructions.
 
@@ -5104,88 +6549,99 @@ return \"srl %1,0,%0\";
 	(plus:TF (match_operand:TF 1 "register_operand" "e")
 		 (match_operand:TF 2 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "faddq %1,%2,%0"
-  [(set_attr "type" "fp")])
+  "faddq\\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "adddf3"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(plus:DF (match_operand:DF 1 "register_operand" "e")
 		 (match_operand:DF 2 "register_operand" "e")))]
   "TARGET_FPU"
-  "faddd %1,%2,%0"
-  [(set_attr "type" "fp")])
+  "faddd\\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "addsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(plus:SF (match_operand:SF 1 "register_operand" "f")
 		 (match_operand:SF 2 "register_operand" "f")))]
   "TARGET_FPU"
-  "fadds %1,%2,%0"
-  [(set_attr "type" "fp")])
+  "fadds\\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "subtf3"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(minus:TF (match_operand:TF 1 "register_operand" "e")
 		  (match_operand:TF 2 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fsubq %1,%2,%0"
-  [(set_attr "type" "fp")])
+  "fsubq\\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "subdf3"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(minus:DF (match_operand:DF 1 "register_operand" "e")
 		  (match_operand:DF 2 "register_operand" "e")))]
   "TARGET_FPU"
-  "fsubd %1,%2,%0"
-  [(set_attr "type" "fp")])
+  "fsubd\\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "subsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(minus:SF (match_operand:SF 1 "register_operand" "f")
 		  (match_operand:SF 2 "register_operand" "f")))]
   "TARGET_FPU"
-  "fsubs %1,%2,%0"
-  [(set_attr "type" "fp")])
+  "fsubs\\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "length" "1")])
 
 (define_insn "multf3"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(mult:TF (match_operand:TF 1 "register_operand" "e")
 		 (match_operand:TF 2 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fmulq %1,%2,%0"
-  [(set_attr "type" "fpmul")])
+  "fmulq\\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "length" "1")])
 
 (define_insn "muldf3"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(mult:DF (match_operand:DF 1 "register_operand" "e")
 		 (match_operand:DF 2 "register_operand" "e")))]
   "TARGET_FPU"
-  "fmuld %1,%2,%0"
-  [(set_attr "type" "fpmul")])
+  "fmuld\\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "length" "1")])
 
 (define_insn "mulsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(mult:SF (match_operand:SF 1 "register_operand" "f")
 		 (match_operand:SF 2 "register_operand" "f")))]
   "TARGET_FPU"
-  "fmuls %1,%2,%0"
-  [(set_attr "type" "fpmul")])
+  "fmuls\\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "length" "1")])
 
 (define_insn "*muldf3_extend"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f"))
 		 (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))]
   "(TARGET_V8 || TARGET_V9) && TARGET_FPU"
-  "fsmuld %1,%2,%0"
-  [(set_attr "type" "fpmul")])
+  "fsmuld\\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "length" "1")])
 
 (define_insn "*multf3_extend"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(mult:TF (float_extend:TF (match_operand:DF 1 "register_operand" "e"))
 		 (float_extend:TF (match_operand:DF 2 "register_operand" "e"))))]
   "(TARGET_V8 || TARGET_V9) && TARGET_FPU && TARGET_HARD_QUAD"
-  "fdmulq %1,%2,%0"
-  [(set_attr "type" "fpmul")])
+  "fdmulq\\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "length" "1")])
 
 ;; don't have timing for quad-prec. divide.
 (define_insn "divtf3"
@@ -5193,133 +6649,295 @@ return \"srl %1,0,%0\";
 	(div:TF (match_operand:TF 1 "register_operand" "e")
 		(match_operand:TF 2 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fdivq %1,%2,%0"
-  [(set_attr "type" "fpdivd")])
+  "fdivq\\t%1, %2, %0"
+  [(set_attr "type" "fpdivd")
+   (set_attr "length" "1")])
 
 (define_insn "divdf3"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(div:DF (match_operand:DF 1 "register_operand" "e")
 		(match_operand:DF 2 "register_operand" "e")))]
   "TARGET_FPU"
-  "fdivd %1,%2,%0"
-  [(set_attr "type" "fpdivd")])
+  "fdivd\\t%1, %2, %0"
+  [(set_attr "type" "fpdivd")
+   (set_attr "length" "1")])
 
 (define_insn "divsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(div:SF (match_operand:SF 1 "register_operand" "f")
 		(match_operand:SF 2 "register_operand" "f")))]
   "TARGET_FPU"
-  "fdivs %1,%2,%0"
-  [(set_attr "type" "fpdivs")])
+  "fdivs\\t%1, %2, %0"
+  [(set_attr "type" "fpdivs")
+   (set_attr "length" "1")])
 
-(define_insn "negtf2"
+(define_expand "negtf2"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
 	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
-  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
   "TARGET_FPU"
-  "*
-{
-  /* v9: can't use fnegs, won't work with upper regs.  */
-  if (which_alternative == 0)
-   return TARGET_V9 ? \"fnegd %0,%0\" : \"fnegs %0,%0\";
-  else
-   return TARGET_V9 ? \"fnegd %1,%0\;fmovd %S1,%S0\"
-     : \"fnegs %1,%0\;fmovs %R1,%R0\;fmovs %S1,%S0\;fmovs %T1,%T0\";
-}"
+  "")
+
+(define_insn "*negtf2_notv9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU
+   && ! TARGET_V9"
+  "@
+  fnegs\\t%0, %0
+  #"
   [(set_attr "type" "fpmove")
-   (set_attr_alternative "length"
-     [(const_int 1)
-      (if_then_else (eq_attr "isa" "v9") (const_int 2) (const_int 4))])])
+   (set_attr "length" "1,2")])
 
-(define_insn "negdf2"
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+	(neg:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_FPU
+   && ! TARGET_V9
+   && reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   if (GET_CODE (operands[1]) == SUBREG)
+     operands[1] = alter_subreg (operands[1]);
+   operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
+   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);")
+
+(define_insn "*negtf2_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU && TARGET_V9"
+  "@
+  fnegd\\t%0, %0
+  #"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1,2")])
+
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+	(neg:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_FPU
+   && TARGET_V9
+   && reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:DF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   if (GET_CODE (operands[1]) == SUBREG)
+     operands[1] = alter_subreg (operands[1]);
+   operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);")
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn "*negdf2_notv9"
   [(set (match_operand:DF 0 "register_operand" "=e,e")
 	(neg:DF (match_operand:DF 1 "register_operand" "0,e")))]
-  "TARGET_FPU"
-  "*
-{
-  if (TARGET_V9)
-    return \"fnegd %1,%0\";
-  else if (which_alternative == 0)
-   return \"fnegs %0,%0\";
-  else
-   return \"fnegs %1,%0\;fmovs %R1,%R0\";
-}"
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fnegs\\t%0, %0
+  #"
   [(set_attr "type" "fpmove")
-   (set_attr_alternative "length"
-     [(const_int 1)
-      (if_then_else (eq_attr "isa" "v9") (const_int 1) (const_int 2))])])
+   (set_attr "length" "1,2")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU
+   && ! TARGET_V9
+   && reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   if (GET_CODE (operands[1]) == SUBREG)
+     operands[1] = alter_subreg (operands[1]);
+   operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);")
+
+(define_insn "*negdf2_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(neg:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_V9"
+  "fnegd\\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1")])
 
 (define_insn "negsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU"
-  "fnegs %1,%0"
-  [(set_attr "type" "fpmove")])
+  "fnegs\\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1")])
+
+(define_expand "abstf2"
+  [(set (match_operand:TF 0 "register_operand" "")
+	(abs:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
 
-(define_insn "abstf2"
+(define_insn "*abstf2_notv9"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
 	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
   ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fabss\\t%0, %0
+  #"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1,2")])
+
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU
+   && ! TARGET_V9
+   && reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   if (GET_CODE (operands[1]) == SUBREG)
+     operands[1] = alter_subreg (operands[1]);
+   operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
+   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);")
+
+(define_insn "*abstf2_hq_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && TARGET_V9 && TARGET_HARD_QUAD"
+  "@
+  fabsd\\t%0, %0
+  fabsq\\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1")])
+
+(define_insn "*abstf2_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && TARGET_V9 && !TARGET_HARD_QUAD"
+  "@
+  fabsd\\t%0, %0
+  #"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1,2")])
+
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU
+   && TARGET_V9
+   && reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:DF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   if (GET_CODE (operands[1]) == SUBREG)
+     operands[1] = alter_subreg (operands[1]);
+   operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);")
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(abs:DF (match_operand:DF 1 "register_operand" "")))]
   "TARGET_FPU"
-  "*
-{
-  /* v9: can't use fabss, won't work with upper regs.  */
-  if (which_alternative == 0)
-    return TARGET_V9 ? \"fabsd %0,%0\" : \"fabss %0,%0\";
-  else
-    return TARGET_V9 ? \"fabsd %1,%0\;fmovd %S1,%S0\"
-      : \"fabss %1,%0\;fmovs %R1,%R0\;fmovs %S1,%S0\;fmovs %T1,%T0\";
-}"
+  "")
+
+(define_insn "*absdf2_notv9"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(abs:DF (match_operand:DF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fabss\\t%0, %0
+  #"
   [(set_attr "type" "fpmove")
-   (set_attr_alternative "length"
-     [(const_int 1)
-      (if_then_else (eq_attr "isa" "v9") (const_int 2) (const_int 4))])])
+   (set_attr "length" "1,2")])
 
-(define_insn "absdf2"
+(define_split
   [(set (match_operand:DF 0 "register_operand" "=e,e")
 	(abs:DF (match_operand:DF 1 "register_operand" "0,e")))]
-  "TARGET_FPU"
-  "*
-{
-  if (TARGET_V9)
-    return \"fabsd %1,%0\";
-  else if (which_alternative == 0)
-    return \"fabss %0,%0\";
-  else
-    return \"fabss %1,%0\;fmovs %R1,%R0\";
-}"
+  "TARGET_FPU
+   && ! TARGET_V9
+   && reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "if (GET_CODE (operands[0]) == SUBREG)
+     operands[0] = alter_subreg (operands[0]);
+   if (GET_CODE (operands[1]) == SUBREG)
+     operands[1] = alter_subreg (operands[1]);
+   operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);")
+
+(define_insn "*absdf2_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(abs:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_V9"
+  "fabsd\\t%1, %0"
   [(set_attr "type" "fpmove")
-   (set_attr_alternative "length"
-     [(const_int 1)
-      (if_then_else (eq_attr "isa" "v9") (const_int 1) (const_int 2))])])
+   (set_attr "length" "1")])
 
 (define_insn "abssf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU"
-  "fabss %1,%0"
-  [(set_attr "type" "fpmove")])
+  "fabss\\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "length" "1")])
 
 (define_insn "sqrttf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
 	(sqrt:TF (match_operand:TF 1 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
-  "fsqrtq %1,%0"
-  [(set_attr "type" "fpsqrt")])
+  "fsqrtq\\t%1, %0"
+  [(set_attr "type" "fpsqrt")
+   (set_attr "length" "1")])
 
 (define_insn "sqrtdf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
   "TARGET_FPU"
-  "fsqrtd %1,%0"
-  [(set_attr "type" "fpsqrt")])
+  "fsqrtd\\t%1, %0"
+  [(set_attr "type" "fpsqrt")
+   (set_attr "length" "1")])
 
 (define_insn "sqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU"
-  "fsqrts %1,%0"
-  [(set_attr "type" "fpsqrt")])
+  "fsqrts\\t%1, %0"
+  [(set_attr "type" "fpsqrt")
+   (set_attr "length" "1")])
 
 ;;- arithmetic shift instructions
 
@@ -5334,9 +6952,21 @@ return \"srl %1,0,%0\";
       && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 31)
     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
 
-  return \"sll %1,%2,%0\";
+  return \"sll\\t%1, %2, %0\";
 }"
-  [(set_attr "type" "shift")])
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
+
+;; We special case multiplication by two, as add can be done
+;; in both ALUs, while shift only in IEU0 on UltraSPARC.
+(define_insn "*ashlsi3_const1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                   (const_int 1)))]
+  ""
+  "add\\t%1, %1, %0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "1")])
 
 (define_expand "ashldi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5354,7 +6984,18 @@ return \"srl %1,0,%0\";
     }
 }")
 
-(define_insn ""
+;; We special case multiplication by two, as add can be done
+;; in both ALUs, while shift only in IEU0 on UltraSPARC.
+(define_insn "*ashldi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (const_int 1)))]
+  "TARGET_ARCH64"
+  "add\\t%1, %1, %0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "1")])
+
+(define_insn "*ashldi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(ashift:DI (match_operand:DI 1 "register_operand" "r")
 		   (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5362,12 +7003,15 @@ return \"srl %1,0,%0\";
   "*
 {
   if (GET_CODE (operands[2]) == CONST_INT
-      && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 31)
+      && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 63)
     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
 
-  return \"sllx %1,%2,%0\";
-}")
+  return \"sllx\\t%1, %2, %0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
 
+;; XXX UGH!
 (define_insn "ashldi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
 	(ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
@@ -5378,12 +7022,14 @@ return \"srl %1,0,%0\";
   [(set_attr "length" "5,5,6")])
 
 ;; Optimize (1LL<<x)-1
+;; XXX this also needs to be fixed to handle equal subregs
+;; XXX first before we could re-enable it.
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=h")
 	(plus:DI (ashift:DI (const_int 1)
 			    (match_operand:SI 2 "arith_operand" "rI"))
 		 (const_int -1)))]
-  "TARGET_V8PLUS"
+  "0 && TARGET_V8PLUS"
   "*
 {
   if (GET_CODE (operands[2]) == REG && REGNO (operands[2]) == REGNO (operands[0]))
@@ -5397,9 +7043,10 @@ return \"srl %1,0,%0\";
 	(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
 				    (const_int 1))
 			 (const_int 0)))]
-  ""
-  "addcc %0,%0,%%g0"
-  [(set_attr "type" "compare")])
+  "! TARGET_LIVE_G0"
+  "addcc\\t%0, %0, %%g0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "*cmp_cc_set_ashift_1"
   [(set (reg:CC_NOOV 100)
@@ -5409,7 +7056,9 @@ return \"srl %1,0,%0\";
    (set (match_operand:SI 0 "register_operand" "=r")
 	(ashift:SI (match_dup 1) (const_int 1)))]
   ""
-  "addcc %1,%1,%0")
+  "addcc\\t%1, %1, %0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "1")])
 
 (define_insn "ashrsi3"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -5422,9 +7071,42 @@ return \"srl %1,0,%0\";
       && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 31)
     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
 
-  return \"sra %1,%2,%0\";
+  return \"sra\\t%1, %2, %0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
+
+(define_insn "*ashrsi3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (match_operand:SI 2 "arith_operand" "r"))))]
+  "TARGET_ARCH64"
+  "sra\\t%1, %2, %0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
+
+;; This handles the case as above, but with constant shift instead of
+;; register. Combiner "simplifies" it for us a little bit though.
+(define_insn "*ashrsi3_extend2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+				(const_int 32))
+		     (match_operand:SI 2 "small_int_or_double" "n")))]
+  "TARGET_ARCH64
+   && ((GET_CODE (operands[2]) == CONST_INT
+        && INTVAL (operands[2]) >= 32 && INTVAL (operands[2]) < 64)
+       || (GET_CODE (operands[2]) == CONST_DOUBLE
+	   && !CONST_DOUBLE_HIGH (operands[2])
+           && CONST_DOUBLE_LOW (operands[2]) >= 32
+           && CONST_DOUBLE_LOW (operands[2]) < 64))"
+  "*
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+
+  return \"sra\\t%1, %2, %0\";
 }"
-  [(set_attr "type" "shift")])
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
 
 (define_expand "ashrdi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5432,13 +7114,15 @@ return \"srl %1,0,%0\";
 		     (match_operand:SI 2 "arith_operand" "rI")))]
   "TARGET_ARCH64 || TARGET_V8PLUS"
   "
-if (! TARGET_ARCH64)
-  {
-    if (GET_CODE (operands[2]) == CONST_INT)
-      FAIL;	/* prefer generic code in this case */
-    emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
-    DONE;
-  }")
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        FAIL;	/* prefer generic code in this case */
+      emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
 
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5451,9 +7135,12 @@ if (! TARGET_ARCH64)
       && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 63)
     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
 
-  return \"srax %1,%2,%0\";
-}")
+  return \"srax\\t%1, %2, %0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
 
+;; XXX
 (define_insn "ashrdi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
 	(ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
@@ -5474,9 +7161,54 @@ if (! TARGET_ARCH64)
       && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 31)
     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
 
-  return \"srl %1,%2,%0\";
+  return \"srl\\t%1, %2, %0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
+
+;; This handles the case where
+;; (zero_extend:DI (lshiftrt:SI (match_operand:SI) (match_operand:SI))),
+;; but combiner "simplifies" it for us.
+(define_insn "*lshrsi3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (subreg:DI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 2 "arith_operand" "r")) 0)
+		(match_operand 3 "" "")))]
+  "TARGET_ARCH64
+   && ((GET_CODE (operands[3]) == CONST_DOUBLE
+           && CONST_DOUBLE_HIGH (operands[3]) == 0
+           && CONST_DOUBLE_LOW (operands[3]) == 0xffffffff)
+#if HOST_BITS_PER_WIDE_INT >= 64
+          || (GET_CODE (operands[3]) == CONST_INT
+              && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) == 0xffffffff)
+#endif
+         )"
+  "srl\\t%1, %2, %0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
+
+;; This handles the case where
+;; (lshiftrt:DI (zero_extend:DI (match_operand:SI)) (const_int >=0 < 32))
+;; but combiner "simplifies" it for us.
+(define_insn "*lshrsi3_extend2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+			 (match_operand 2 "small_int_or_double" "n")
+			 (const_int 32)))]
+  "TARGET_ARCH64
+   && ((GET_CODE (operands[2]) == CONST_INT
+        && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32)
+       || (GET_CODE (operands[2]) == CONST_DOUBLE
+	   && CONST_DOUBLE_HIGH (operands[2]) == 0
+           && (unsigned HOST_WIDE_INT) CONST_DOUBLE_LOW (operands[2]) < 32))"
+  "*
+{
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+
+  return \"srl\\t%1, %2, %0\";
 }"
-  [(set_attr "type" "shift")])
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
 
 (define_expand "lshrdi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5484,13 +7216,15 @@ if (! TARGET_ARCH64)
 		     (match_operand:SI 2 "arith_operand" "rI")))]
   "TARGET_ARCH64 || TARGET_V8PLUS"
   "
-if (! TARGET_ARCH64)
-  {
-    if (GET_CODE (operands[2]) == CONST_INT)
-      FAIL;
-    emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
-    DONE;
-  }")
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        FAIL;
+      emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
 
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5503,9 +7237,12 @@ if (! TARGET_ARCH64)
       && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 63)
     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
 
-  return \"srlx %1,%2,%0\";
-}")
+  return \"srlx\\t%1, %2, %0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1")])
 
+;; XXX
 (define_insn "lshrdi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
 	(lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
@@ -5524,19 +7261,19 @@ if (! TARGET_ARCH64)
   ""
   "*
 {
-  /* Some implementations (e.g. TurboSparc) are reported to have problems
+  /* TurboSparc is reported to have problems with
      with
 	foo: b,a foo
      i.e. an empty loop with the annul bit set.  The workaround is to use 
         foo: b foo; nop
      instead.  */
 
-  if (flag_delayed_branch
+  if (! TARGET_V9 && flag_delayed_branch
       && (insn_addresses[INSN_UID (operands[0])]
 	  == insn_addresses[INSN_UID (insn)]))
-    return \"b %l0%#\";
+    return \"b\\t%l0%#\";
   else
-    return \"b%* %l0%(\";
+    return TARGET_V9 ? \"ba,pt%*\\t%%xcc, %l0%(\" : \"b%*\\t%l0%(\";
 }"
   [(set_attr "type" "uncond_branch")])
 
@@ -5546,7 +7283,7 @@ if (! TARGET_ARCH64)
   ""
   "
 {
-  if (GET_MODE (operands[0]) != Pmode)
+  if (GET_MODE (operands[0]) != CASE_VECTOR_MODE)
     abort ();
 
   /* In pic mode, our address differences are against the base of the
@@ -5557,6 +7294,8 @@ if (! TARGET_ARCH64)
       rtx tmp, tmp2;
       tmp = gen_rtx_LABEL_REF (Pmode, operands[1]);
       tmp2 = operands[0];
+      if (CASE_VECTOR_MODE != Pmode)
+        tmp2 = gen_rtx_SIGN_EXTEND (Pmode, tmp2);
       tmp = gen_rtx_PLUS (Pmode, tmp2, tmp);
       operands[0] = memory_address (Pmode, tmp);
     }
@@ -5566,14 +7305,14 @@ if (! TARGET_ARCH64)
   [(set (pc) (match_operand:SI 0 "address_operand" "p"))
    (use (label_ref (match_operand 1 "" "")))]
   "! TARGET_PTR64"
-  "jmp %a0%#"
+  "jmp\\t%a0%#"
   [(set_attr "type" "uncond_branch")])
 
 (define_insn "*tablejump_sp64"
   [(set (pc) (match_operand:DI 0 "address_operand" "p"))
    (use (label_ref (match_operand 1 "" "")))]
   "TARGET_PTR64"
-  "jmp %a0%#"
+  "jmp\\t%a0%#"
   [(set_attr "type" "uncond_branch")])
 
 ;; This pattern recognizes the "instruction" that appears in 
@@ -5582,7 +7321,7 @@ if (! TARGET_ARCH64)
 ;(define_insn "*unimp_insn"
 ;  [(match_operand:SI 0 "immediate_operand" "")]
 ;  "GET_CODE (operands[0]) == CONST_INT && INTVAL (operands[0]) > 0"
-;  "unimp %0"
+;  "unimp\\t%0"
 ;  [(set_attr "type" "marker")])
 
 ;;- jump to subroutine
@@ -5681,7 +7420,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 1 for most machines.
   "! TARGET_PTR64"
-  "call %a0,%1%#"
+  "call\\t%a0, %1%#"
   [(set_attr "type" "call")])
 
 (define_insn "*call_symbolic_sp32"
@@ -5690,7 +7429,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 1 for most machines.
   "! TARGET_PTR64"
-  "call %a0,%1%#"
+  "call\\t%a0, %1%#"
   [(set_attr "type" "call")])
 
 (define_insn "*call_address_sp64"
@@ -5699,7 +7438,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:DI 15))]
   ;;- Do not use operand 1 for most machines.
   "TARGET_PTR64"
-  "call %a0,%1%#"
+  "call\\t%a0, %1%#"
   [(set_attr "type" "call")])
 
 (define_insn "*call_symbolic_sp64"
@@ -5708,7 +7447,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:DI 15))]
   ;;- Do not use operand 1 for most machines.
   "TARGET_PTR64"
-  "call %a0,%1%#"
+  "call\\t%a0, %1%#"
   [(set_attr "type" "call")])
 
 ;; This is a call that wants a structure value.
@@ -5720,7 +7459,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 1 for most machines.
   "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 0"
-  "call %a0,%1\;nop\;unimp %2"
+  "call\\t%a0, %1\\n\\tnop\\n\\tunimp\\t%2"
   [(set_attr "type" "call_no_delay_slot")])
 
 ;; This is a call that wants a structure value.
@@ -5732,7 +7471,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 1 for most machines.
   "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 0"
-  "call %a0,%1\;nop\;unimp %2"
+  "call\\t%a0, %1\\n\\tnop\\n\\tunimp\\t%2"
   [(set_attr "type" "call_no_delay_slot")])
 
 ;; This is a call that may want a structure value.  This is used for
@@ -5744,7 +7483,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 1 for most machines.
   "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0"
-  "call %a0,%1\;nop\;nop"
+  "call\\t%a0, %1\\n\\tnop\\n\\tnop"
   [(set_attr "type" "call_no_delay_slot")])
 
 ;; This is a call that wants a structure value.
@@ -5755,7 +7494,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 1 for most machines.
   "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0"
-  "call %a0,%1\;nop\;nop"
+  "call\\t%a0, %1\\n\\tnop\\n\\tnop"
   [(set_attr "type" "call_no_delay_slot")])
 
 (define_expand "call_value"
@@ -5803,7 +7542,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 2 for most machines.
   "! TARGET_PTR64"
-  "call %a1,%2%#"
+  "call\\t%a1, %2%#"
   [(set_attr "type" "call")])
 
 (define_insn "*call_value_symbolic_sp32"
@@ -5813,7 +7552,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:SI 15))]
   ;;- Do not use operand 2 for most machines.
   "! TARGET_PTR64"
-  "call %a1,%2%#"
+  "call\\t%a1, %2%#"
   [(set_attr "type" "call")])
 
 (define_insn "*call_value_address_sp64"
@@ -5823,7 +7562,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:DI 15))]
   ;;- Do not use operand 2 for most machines.
   "TARGET_PTR64"
-  "call %a1,%2%#"
+  "call\\t%a1, %2%#"
   [(set_attr "type" "call")])
 
 (define_insn "*call_value_symbolic_sp64"
@@ -5833,7 +7572,7 @@ if (! TARGET_ARCH64)
    (clobber (reg:DI 15))]
   ;;- Do not use operand 2 for most machines.
   "TARGET_PTR64"
-  "call %a1,%2%#"
+  "call\\t%a1, %2%#"
   [(set_attr "type" "call")])
 
 (define_expand "untyped_call"
@@ -5871,7 +7610,8 @@ if (! TARGET_ARCH64)
 (define_insn "blockage"
   [(unspec_volatile [(const_int 0)] 0)]
   ""
-  "")
+  ""
+  [(set_attr "length" "0")])
 
 ;; Prepare to return any type including a structure value.
 
@@ -5887,7 +7627,8 @@ if (! TARGET_ARCH64)
 
   if (! TARGET_ARCH64)
     {
-      rtx rtnreg = gen_rtx_REG (SImode, (leaf_function ? 15 : 31));
+      rtx rtnreg = gen_rtx_REG (SImode, (current_function_uses_only_leaf_regs
+					 ? 15 : 31));
       rtx value = gen_reg_rtx (SImode);
 
       /* Fetch the instruction where we will return to and see if it's an unimp
@@ -5938,12 +7679,14 @@ if (! TARGET_ARCH64)
    (parallel [(return)
 	      (use (reg:SI 31))])]
   "sparc_return_peephole_ok (operands[0], operands[1])"
-  "return %%i7+8\;mov %Y1,%Y0")
+  "return\\t%%i7+8\\n\\tmov\\t%Y1, %Y0")
 
 (define_insn "nop"
   [(const_int 0)]
   ""
-  "nop")
+  "nop"
+  [(set_attr "type" "ialu")
+   (set_attr "length" "1")])
 
 (define_expand "indirect_jump"
   [(set (pc) (match_operand 0 "address_operand" "p"))]
@@ -5953,13 +7696,13 @@ if (! TARGET_ARCH64)
 (define_insn "*branch_sp32"
   [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
   "! TARGET_PTR64"
- "jmp %a0%#"
+ "jmp\\t%a0%#"
  [(set_attr "type" "uncond_branch")])
  
 (define_insn "*branch_sp64"
   [(set (pc) (match_operand:DI 0 "address_operand" "p"))]
   "TARGET_PTR64"
-  "jmp %a0%#"
+  "jmp\\t%a0%#"
   [(set_attr "type" "uncond_branch")])
 
 ;; ??? Doesn't work with -mflat.
@@ -5971,7 +7714,9 @@ if (! TARGET_ARCH64)
   ""
   "
 {
+#if 0
   rtx chain = operands[0];
+#endif
   rtx fp = operands[1];
   rtx stack = operands[2];
   rtx lab = operands[3];
@@ -6001,6 +7746,8 @@ if (! TARGET_ARCH64)
      really needed.  */
   /*emit_insn (gen_rtx_USE (VOIDmode, frame_pointer_rtx));*/
   emit_insn (gen_rtx_USE (VOIDmode, stack_pointer_rtx));
+
+#if 0
   /* Return, restoring reg window and jumping to goto handler.  */
   if (TARGET_V9 && GET_CODE (chain) == CONST_INT
       && ! (INTVAL (chain) & ~(HOST_WIDE_INT)0xffffffff))
@@ -6012,6 +7759,8 @@ if (! TARGET_ARCH64)
     }
   /* Put in the static chain register the nonlocal label address.  */
   emit_move_insn (static_chain_rtx, chain);
+#endif
+
   emit_insn (gen_rtx_USE (VOIDmode, static_chain_rtx));
   emit_insn (gen_goto_handler_and_restore (labreg));
   emit_barrier ();
@@ -6022,37 +7771,38 @@ if (! TARGET_ARCH64)
 (define_insn "flush_register_windows"
   [(unspec_volatile [(const_int 0)] 1)]
   ""
-  "* return TARGET_V9 ? \"flushw\" : \"ta 3\";"
-  [(set_attr "type" "misc")])
+  "* return TARGET_V9 ? \"flushw\" : \"ta\\t3\";"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn "goto_handler_and_restore"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "=r")] 2)]
+  [(unspec_volatile [(match_operand 0 "register_operand" "=r")] 2)]
   ""
-  "jmp %0+0\;restore"
+  "jmp\\t%0+0\\n\\trestore"
   [(set_attr "type" "misc")
    (set_attr "length" "2")])
 
-(define_insn "goto_handler_and_restore_v9"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "=r,r")
-		     (match_operand:SI 1 "register_operand" "=r,r")
-		     (match_operand:SI 2 "const_int_operand" "I,n")] 3)]
-  "TARGET_V9 && ! TARGET_ARCH64"
-  "@
-   return %0+0\;mov %2,%Y1
-   sethi %%hi(%2),%1\;return %0+0\;or %Y1,%%lo(%2),%Y1"
-  [(set_attr "type" "misc")
-   (set_attr "length" "2,3")])
-
-(define_insn "*goto_handler_and_restore_v9_sp64"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "=r,r")
-		     (match_operand:DI 1 "register_operand" "=r,r")
-		     (match_operand:SI 2 "const_int_operand" "I,n")] 3)]
-  "TARGET_V9 && TARGET_ARCH64"
-  "@
-   return %0+0\;mov %2,%Y1
-   sethi %%hi(%2),%1\;return %0+0\;or %Y1,%%lo(%2),%Y1"
-  [(set_attr "type" "misc")
-   (set_attr "length" "2,3")])
+;;(define_insn "goto_handler_and_restore_v9"
+;;  [(unspec_volatile [(match_operand:SI 0 "register_operand" "=r,r")
+;;		     (match_operand:SI 1 "register_operand" "=r,r")
+;;		     (match_operand:SI 2 "const_int_operand" "I,n")] 3)]
+;;  "TARGET_V9 && ! TARGET_ARCH64"
+;;  "@
+;;   return\\t%0+0\\n\\tmov\\t%2, %Y1
+;;   sethi\\t%%hi(%2), %1\\n\\treturn\\t%0+0\\n\\tor\\t%Y1, %%lo(%2), %Y1"
+;;  [(set_attr "type" "misc")
+;;   (set_attr "length" "2,3")])
+;;
+;;(define_insn "*goto_handler_and_restore_v9_sp64"
+;;  [(unspec_volatile [(match_operand:DI 0 "register_operand" "=r,r")
+;;		     (match_operand:DI 1 "register_operand" "=r,r")
+;;		     (match_operand:SI 2 "const_int_operand" "I,n")] 3)]
+;;  "TARGET_V9 && TARGET_ARCH64"
+;;  "@
+;;   return\\t%0+0\\n\\tmov\\t%2, %Y1
+;;   sethi\\t%%hi(%2), %1\\n\\treturn\\t%0+0\\n\\tor\\t%Y1, %%lo(%2), %Y1"
+;;  [(set_attr "type" "misc")
+;;   (set_attr "length" "2,3")])
 
 ;; Pattern for use after a setjmp to store FP and the return register
 ;; into the stack area.
@@ -6086,10 +7836,11 @@ if (! TARGET_ARCH64)
 ;; Special pattern for the FLUSH instruction.
 
 (define_insn "flush"
-  [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 3)]
+  [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 4)]
   ""
-  "* return TARGET_V9 ? \"flush %f0\" : \"iflush %f0\";"
-  [(set_attr "type" "misc")])
+  "* return TARGET_V9 ? \"flush\\t%f0\" : \"iflush\\t%f0\";"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 ;; find first set.
 
@@ -6098,6 +7849,7 @@ if (! TARGET_ARCH64)
 ;; zero also differ.  It takes at least 7 instructions to get the proper
 ;; result.  Here is an obvious 8 instruction sequence.
 
+;; XXX
 (define_insn "ffssi2"
   [(set (match_operand:SI 0 "register_operand" "=&r")
 	(ffs:SI (match_operand:SI 1 "register_operand" "r")))
@@ -6126,205 +7878,7 @@ if (! TARGET_ARCH64)
 ;  "neg %1,%2\;xnor %1,%2,%2\;popc %2,%0\;movzr %1,0,%0"
 ;  [(set_attr "type" "multi")
 ;   (set_attr "length" "4")])
-
-;; Split up troublesome insns for better scheduling.  */
-
-;; The following patterns are straightforward.  They can be applied
-;; either before or after register allocation.
-
-(define_split
-  [(set (match_operand 0 "splittable_symbolic_memory_operand" "")
-	(match_operand 1 "reg_or_0_operand" ""))
-   (clobber (match_operand:SI 2 "register_operand" ""))]
-  "! flag_pic"
-  [(set (match_dup 2) (high:SI (match_dup 3)))
-   (set (match_dup 4) (match_dup 1))]
-  "
-{
-  operands[3] = XEXP (operands[0], 0);
-  operands[4] = gen_rtx_MEM (GET_MODE (operands[0]),
-			 gen_rtx_LO_SUM (SImode, operands[2], operands[3]));
-  MEM_IN_STRUCT_P (operands[4]) = MEM_IN_STRUCT_P (operands[0]);
-  MEM_VOLATILE_P (operands[4]) = MEM_VOLATILE_P (operands[0]);
-  RTX_UNCHANGING_P (operands[4]) = RTX_UNCHANGING_P (operands[0]);
-}")
 
-(define_split
-  [(set (match_operand 0 "splittable_immediate_memory_operand" "")
-	(match_operand 1 "general_operand" ""))
-   (clobber (match_operand:SI 2 "register_operand" ""))]
-  "flag_pic"
-  [(set (match_dup 3) (match_dup 1))]
-  "
-{
-  rtx addr = legitimize_pic_address (XEXP (operands[0], 0),
-				     GET_MODE (operands[0]),
-				     operands[2]);
-  operands[3] = gen_rtx_MEM (GET_MODE (operands[0]), addr);
-  MEM_IN_STRUCT_P (operands[3]) = MEM_IN_STRUCT_P (operands[0]);
-  MEM_VOLATILE_P (operands[3]) = MEM_VOLATILE_P (operands[0]);
-  RTX_UNCHANGING_P (operands[3]) = RTX_UNCHANGING_P (operands[0]);
-}")
-
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-	(match_operand 1 "splittable_immediate_memory_operand" ""))]
-  "flag_pic"
-  [(set (match_dup 0) (match_dup 2))]
-  "
-{
-  rtx addr = legitimize_pic_address (XEXP (operands[1], 0),
-				     GET_MODE (operands[1]),
-				     operands[0]);
-  operands[2] = gen_rtx_MEM (GET_MODE (operands[1]), addr);
-  MEM_IN_STRUCT_P (operands[2]) = MEM_IN_STRUCT_P (operands[1]);
-  MEM_VOLATILE_P (operands[2]) = MEM_VOLATILE_P (operands[1]);
-  RTX_UNCHANGING_P (operands[2]) = RTX_UNCHANGING_P (operands[1]);
-}")
-
-;; Sign- and Zero-extend operations can have symbolic memory operands.
-
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-	(match_operator 1 "extend_op" [(match_operand 2 "splittable_immediate_memory_operand" "")]))]
-  "flag_pic"
-  [(set (match_dup 0) (match_op_dup 1 [(match_dup 3)]))]
-  "
-{
-  rtx addr = legitimize_pic_address (XEXP (operands[2], 0),
-				     GET_MODE (operands[2]),
-				     operands[0]);
-  operands[3] = gen_rtx_MEM (GET_MODE (operands[2]), addr);
-  MEM_IN_STRUCT_P (operands[3]) = MEM_IN_STRUCT_P (operands[2]);
-  MEM_VOLATILE_P (operands[3]) = MEM_VOLATILE_P (operands[2]);
-  RTX_UNCHANGING_P (operands[3]) = RTX_UNCHANGING_P (operands[2]);
-}")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(match_operand:SI 1 "immediate_operand" ""))]
-  "! flag_pic && (GET_CODE (operands[1]) == SYMBOL_REF
-		  || GET_CODE (operands[1]) == CONST
-		  || GET_CODE (operands[1]) == LABEL_REF)"
-  [(set (match_dup 0) (high:SI (match_dup 1)))
-   (set (match_dup 0)
-	(lo_sum:SI (match_dup 0) (match_dup 1)))]
-  "")
-
-;; LABEL_REFs are not modified by `legitimize_pic_address'
-;; so do not recurse infinitely in the PIC case.
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(match_operand:SI 1 "immediate_operand" ""))]
-  "flag_pic && (GET_CODE (operands[1]) == SYMBOL_REF
-		|| GET_CODE (operands[1]) == CONST)"
-  [(set (match_dup 0) (match_dup 1))]
-  "
-{
-  operands[1] = legitimize_pic_address (operands[1], Pmode, operands[0]);
-}")
-
-;; These split sne/seq insns.  The forms of the resulting insns are 
-;; somewhat bogus, but they avoid extra patterns and show data dependency.
-;; Nothing will look at these in detail after splitting has occurred.
-
-;; ??? v9 DImode versions are missing because addc and subc use %icc.
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(ne:SI (match_operand:SI 1 "register_operand" "")
-	       (const_int 0)))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (ltu:SI (reg:CC 100) (const_int 0)))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(neg:SI (ne:SI (match_operand:SI 1 "register_operand" "")
-		       (const_int 0))))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (neg:SI (ltu:SI (reg:CC 100) (const_int 0))))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(eq:SI (match_operand:SI 1 "register_operand" "")
-	       (const_int 0)))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (geu:SI (reg:CC 100) (const_int 0)))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(neg:SI (eq:SI (match_operand:SI 1 "register_operand" "")
-		       (const_int 0))))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (neg:SI (geu:SI (reg:CC 100) (const_int 0))))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(plus:SI (ne:SI (match_operand:SI 1 "register_operand" "")
-			(const_int 0))
-		 (match_operand:SI 2 "register_operand" "")))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
-			       (match_dup 2)))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(minus:SI (match_operand:SI 2 "register_operand" "")
-		  (ne:SI (match_operand:SI 1 "register_operand" "")
-			 (const_int 0))))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (minus:SI (match_dup 2)
-				(ltu:SI (reg:CC 100) (const_int 0))))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(plus:SI (eq:SI (match_operand:SI 1 "register_operand" "")
-			(const_int 0))
-		 (match_operand:SI 2 "register_operand" "")))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (plus:SI (geu:SI (reg:CC 100) (const_int 0))
-			       (match_dup 2)))]
-  "")
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(minus:SI (match_operand:SI 2 "register_operand" "")
-		  (eq:SI (match_operand:SI 1 "register_operand" "")
-			 (const_int 0))))
-   (clobber (reg:CC 100))]
-  ""
-  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
-					   (const_int 0)))
-   (set (match_dup 0) (minus:SI (match_dup 2)
-				(geu:SI (reg:CC 100) (const_int 0))))]
-  "")
 
 ;; Peepholes go at the end.
 
@@ -6338,9 +7892,10 @@ if (! TARGET_ARCH64)
    (set (match_operand:SI 1 "memory_operand" "")
       (const_int 0))]
   "TARGET_V9
-   && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[1])
+   && ! MEM_VOLATILE_P (operands[0])
+   && ! MEM_VOLATILE_P (operands[1])
    && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[1], 0))"
-  "stx %%g0,%0")
+  "stx\\t%%g0, %0")
 
 (define_peephole
   [(set (match_operand:SI 0 "memory_operand" "")
@@ -6348,89 +7903,98 @@ if (! TARGET_ARCH64)
    (set (match_operand:SI 1 "memory_operand" "")
       (const_int 0))]
   "TARGET_V9
-   && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[1])
+   && ! MEM_VOLATILE_P (operands[0])
+   && ! MEM_VOLATILE_P (operands[1])
    && addrs_ok_for_ldd_peep (XEXP (operands[1], 0), XEXP (operands[0], 0))"
-  "stx %%g0,%1")
+  "stx\\t%%g0, %1")
 
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=rf")
         (match_operand:SI 1 "memory_operand" ""))
    (set (match_operand:SI 2 "register_operand" "=rf")
         (match_operand:SI 3 "memory_operand" ""))]
-   "registers_ok_for_ldd_peep (operands[0], operands[2]) 
-   && ! MEM_VOLATILE_P (operands[1]) && ! MEM_VOLATILE_P (operands[3])
+  "registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   && ! MEM_VOLATILE_P (operands[1])
+   && ! MEM_VOLATILE_P (operands[3])
    && addrs_ok_for_ldd_peep (XEXP (operands[1], 0), XEXP (operands[3], 0))" 
-  "ldd %1,%0")
+  "ldd\\t%1, %0")
 
 (define_peephole
   [(set (match_operand:SI 0 "memory_operand" "")
         (match_operand:SI 1 "register_operand" "rf"))
    (set (match_operand:SI 2 "memory_operand" "")
         (match_operand:SI 3 "register_operand" "rf"))]
-   "registers_ok_for_ldd_peep (operands[1], operands[3]) 
-   && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[2])
+  "registers_ok_for_ldd_peep (operands[1], operands[3]) 
+   && ! MEM_VOLATILE_P (operands[0])
+   && ! MEM_VOLATILE_P (operands[2])
    && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[2], 0))"
-  "std %1,%0")
+  "std\\t%1, %0")
  
 (define_peephole
   [(set (match_operand:SF 0 "register_operand" "=fr")
         (match_operand:SF 1 "memory_operand" ""))
    (set (match_operand:SF 2 "register_operand" "=fr")
         (match_operand:SF 3 "memory_operand" ""))]
-   "registers_ok_for_ldd_peep (operands[0], operands[2]) 
-   && ! MEM_VOLATILE_P (operands[1]) && ! MEM_VOLATILE_P (operands[3])
+  "registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   && ! MEM_VOLATILE_P (operands[1])
+   && ! MEM_VOLATILE_P (operands[3])
    && addrs_ok_for_ldd_peep (XEXP (operands[1], 0), XEXP (operands[3], 0))"
-  "ldd %1,%0")
+  "ldd\\t%1, %0")
 
 (define_peephole
   [(set (match_operand:SF 0 "memory_operand" "")
         (match_operand:SF 1 "register_operand" "fr"))
    (set (match_operand:SF 2 "memory_operand" "")
         (match_operand:SF 3 "register_operand" "fr"))]
-   "registers_ok_for_ldd_peep (operands[1], operands[3]) 
-   && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[2])
-   && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[2], 0))"
-  "std %1,%0")
+  "registers_ok_for_ldd_peep (operands[1], operands[3]) 
+  && ! MEM_VOLATILE_P (operands[0])
+  && ! MEM_VOLATILE_P (operands[2])
+  && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[2], 0))"
+  "std\\t%1, %0")
 
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=rf")
         (match_operand:SI 1 "memory_operand" ""))
    (set (match_operand:SI 2 "register_operand" "=rf")
         (match_operand:SI 3 "memory_operand" ""))]
-   "registers_ok_for_ldd_peep (operands[2], operands[0]) 
-   && ! MEM_VOLATILE_P (operands[3]) && ! MEM_VOLATILE_P (operands[1])
-   && addrs_ok_for_ldd_peep (XEXP (operands[3], 0), XEXP (operands[1], 0))"
-  "ldd %3,%2")
+  "registers_ok_for_ldd_peep (operands[2], operands[0]) 
+  && ! MEM_VOLATILE_P (operands[3])
+  && ! MEM_VOLATILE_P (operands[1])
+  && addrs_ok_for_ldd_peep (XEXP (operands[3], 0), XEXP (operands[1], 0))"
+  "ldd\\t%3, %2")
 
 (define_peephole
   [(set (match_operand:SI 0 "memory_operand" "")
         (match_operand:SI 1 "register_operand" "rf"))
    (set (match_operand:SI 2 "memory_operand" "")
         (match_operand:SI 3 "register_operand" "rf"))]
-   "registers_ok_for_ldd_peep (operands[3], operands[1]) 
-   && ! MEM_VOLATILE_P (operands[2]) && ! MEM_VOLATILE_P (operands[0])
-   && addrs_ok_for_ldd_peep (XEXP (operands[2], 0), XEXP (operands[0], 0))" 
-  "std %3,%2")
+  "registers_ok_for_ldd_peep (operands[3], operands[1]) 
+  && ! MEM_VOLATILE_P (operands[2])
+  && ! MEM_VOLATILE_P (operands[0])
+  && addrs_ok_for_ldd_peep (XEXP (operands[2], 0), XEXP (operands[0], 0))" 
+  "std\\t%3, %2")
  
 (define_peephole
   [(set (match_operand:SF 0 "register_operand" "=fr")
         (match_operand:SF 1 "memory_operand" ""))
    (set (match_operand:SF 2 "register_operand" "=fr")
         (match_operand:SF 3 "memory_operand" ""))]
-   "registers_ok_for_ldd_peep (operands[2], operands[0]) 
-   && ! MEM_VOLATILE_P (operands[3]) && ! MEM_VOLATILE_P (operands[1])
-   && addrs_ok_for_ldd_peep (XEXP (operands[3], 0), XEXP (operands[1], 0))"
-  "ldd %3,%2")
+  "registers_ok_for_ldd_peep (operands[2], operands[0]) 
+  && ! MEM_VOLATILE_P (operands[3])
+  && ! MEM_VOLATILE_P (operands[1])
+  && addrs_ok_for_ldd_peep (XEXP (operands[3], 0), XEXP (operands[1], 0))"
+  "ldd\\t%3, %2")
 
 (define_peephole
   [(set (match_operand:SF 0 "memory_operand" "")
         (match_operand:SF 1 "register_operand" "fr"))
    (set (match_operand:SF 2 "memory_operand" "")
         (match_operand:SF 3 "register_operand" "fr"))]
-   "registers_ok_for_ldd_peep (operands[3], operands[1]) 
-   && ! MEM_VOLATILE_P (operands[2]) && ! MEM_VOLATILE_P (operands[0])
-   && addrs_ok_for_ldd_peep (XEXP (operands[2], 0), XEXP (operands[0], 0))"
-  "std %3,%2")
+  "registers_ok_for_ldd_peep (operands[3], operands[1]) 
+  && ! MEM_VOLATILE_P (operands[2])
+  && ! MEM_VOLATILE_P (operands[0])
+  && addrs_ok_for_ldd_peep (XEXP (operands[2], 0), XEXP (operands[0], 0))"
+  "std\\t%3, %2")
  
 ;; Optimize the case of following a reg-reg move with a test
 ;; of reg just moved.  Don't allow floating point regs for operand 0 or 1.
@@ -6444,8 +8008,9 @@ if (! TARGET_ARCH64)
 		    (const_int 0)))]
   "(rtx_equal_p (operands[2], operands[0])
     || rtx_equal_p (operands[2], operands[1]))
-   && ! FP_REG_P (operands[0]) && ! FP_REG_P (operands[1])"
-  "orcc %1,0,%0")
+   && ! FP_REG_P (operands[0])
+   && ! FP_REG_P (operands[1])"
+  "orcc\\t%1, 0, %0")
 
 (define_peephole
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -6456,37 +8021,9 @@ if (! TARGET_ARCH64)
   "TARGET_ARCH64
    && (rtx_equal_p (operands[2], operands[0])
        || rtx_equal_p (operands[2], operands[1]))
-   && ! FP_REG_P (operands[0]) && ! FP_REG_P (operands[1])"
-  "orcc %1,0,%0")
-
-;; Floating-point move peepholes
-;; ??? v9: Do we want similar ones?
-
-(define_peephole
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(lo_sum:SI (match_dup 0)
-		   (match_operand:SI 1 "immediate_operand" "i")))
-   (set (match_operand:DF 2 "register_operand" "=er")
-	(mem:DF (match_dup 0)))]
-  "RTX_UNCHANGING_P (operands[1]) && reg_unused_after (operands[0], insn)"
-  "*
-{
-  /* Go by way of output_move_double in case the register in operand 2
-     is not properly aligned for ldd.  */
-  operands[1] = gen_rtx_MEM (DFmode,
-			 gen_rtx_LO_SUM (SImode, operands[0], operands[1]));
-  operands[0] = operands[2];
-  return output_move_double (operands);
-}")
-
-(define_peephole
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(lo_sum:SI (match_dup 0)
-		   (match_operand:SI 1 "immediate_operand" "i")))
-   (set (match_operand:SF 2 "register_operand" "=fr")
-	(mem:SF (match_dup 0)))]
-  "RTX_UNCHANGING_P (operands[1]) && reg_unused_after (operands[0], insn)"
-  "ld [%0+%%lo(%a1)],%2")
+   && ! FP_REG_P (operands[0])
+   && ! FP_REG_P (operands[1])"
+  "orcc\\t%1, 0, %0")
 
 ;; Return peepholes.  First the "normal" ones.
 ;; These are necessary to catch insns ending up in the epilogue delay list.
@@ -6499,12 +8036,12 @@ if (! TARGET_ARCH64)
   "*
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
-    return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+    return \"jmp\\t%%i7+12\\n\\trestore %%g0, %1, %Y0\";
   else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
 			 || IN_OR_GLOBAL_P (operands[1])))
-    return \"return %%i7+8\;mov %Y1,%Y0\";
+    return \"return\\t%%i7+8\\n\\tmov\\t%Y1, %Y0\";
   else
-    return \"ret\;restore %%g0,%1,%Y0\";
+    return \"ret\\n\\trestore %%g0, %1, %Y0\";
 }"
   [(set_attr "type" "multi")])
 
@@ -6516,12 +8053,12 @@ if (! TARGET_ARCH64)
   "*
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
-    return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+    return \"jmp\\t%%i7+12\\n\\trestore %%g0, %1, %Y0\";
   else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
 			 || IN_OR_GLOBAL_P (operands[1])))
-    return \"return %%i7+8\;mov %Y1,%Y0\";
+    return \"return\\t%%i7+8\\n\\tmov\\t%Y1, %Y0\";
   else
-    return \"ret\;restore %%g0,%1,%Y0\";
+    return \"ret\;restore %%g0, %1, %Y0\";
 }"
   [(set_attr "type" "multi")])
 
@@ -6533,31 +8070,32 @@ if (! TARGET_ARCH64)
   "*
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
-    return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+    return \"jmp\\t%%i7+12\\n\\trestore %%g0, %1, %Y0\";
   else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
 			 || IN_OR_GLOBAL_P (operands[1])))
-    return \"return %%i7+8\;mov %Y1,%Y0\";
+    return \"return\\t%%i7+8\\n\\tmov\\t%Y1, %Y0\";
   else
-    return \"ret\;restore %%g0,%1,%Y0\";
+    return \"ret\;restore %%g0, %1, %Y0\";
 }"
   [(set_attr "type" "multi")])
 
 ;; The following pattern is only generated by delayed-branch scheduling,
-;; when the insn winds up in the epilogue.  This can only happen when
-;; ! TARGET_FPU because otherwise fp return values are in %f0.
+;; when the insn winds up in the epilogue.  This can happen not only when
+;; ! TARGET_FPU because we move complex types around by parts using
+;; SF mode SUBREGs.
 (define_insn "*return_sf_no_fpu"
   [(set (match_operand:SF 0 "restore_operand" "r")
 	(match_operand:SF 1 "register_operand" "r"))
    (return)]
-  "! TARGET_FPU && ! TARGET_EPILOGUE && ! TARGET_LIVE_G0"
+  "! TARGET_EPILOGUE && ! TARGET_LIVE_G0"
   "*
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
-    return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+    return \"jmp\\t%%i7+12\\n\\trestore %%g0, %1, %Y0\";
   else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1]))
-    return \"return %%i7+8\;mov %Y1,%Y0\";
+    return \"return\\t%%i7+8\\n\\tmov\\t%Y1, %Y0\";
   else
-    return \"ret\;restore %%g0,%1,%Y0\";
+    return \"ret\;restore %%g0, %1, %Y0\";
 }"
   [(set_attr "type" "multi")])
 
@@ -6570,14 +8108,14 @@ if (! TARGET_ARCH64)
   "*
 {
   if (! TARGET_ARCH64 && current_function_returns_struct)
-    return \"jmp %%i7+12\;restore %r1,%2,%Y0\";
+    return \"jmp\\t%%i7+12\\n\\trestore %r1, %2, %Y0\";
   /* If operands are global or in registers, can use return */
   else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1])
 	   && (GET_CODE (operands[2]) == CONST_INT
 	       || IN_OR_GLOBAL_P (operands[2])))
-    return \"return %%i7+8\;add %Y1,%Y2,%Y0\";
+    return \"return\\t%%i7+8\\n\\tadd\\t%Y1, %Y2, %Y0\";
   else
-    return \"ret\;restore %r1,%2,%Y0\";
+    return \"ret\;restore %r1, %2, %Y0\";
 }"
   [(set_attr "type" "multi")])
 
@@ -6586,7 +8124,7 @@ if (! TARGET_ARCH64)
 	(match_operand:DI 1 "arith_double_operand" "rHI"))
    (return)]
   "TARGET_ARCH64 && ! TARGET_EPILOGUE"
-  "ret\;restore %%g0,%1,%Y0"
+  "ret\;restore %%g0, %1, %Y0"
   [(set_attr "type" "multi")])
 
 (define_insn "*return_adddi"
@@ -6595,7 +8133,7 @@ if (! TARGET_ARCH64)
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))
    (return)]
   "TARGET_ARCH64 && ! TARGET_EPILOGUE"
-  "ret\;restore %r1,%2,%Y0"
+  "ret\;restore %r1, %2, %Y0"
   [(set_attr "type" "multi")])
 
 ;; The following pattern is only generated by delayed-branch scheduling,
@@ -6605,7 +8143,7 @@ if (! TARGET_ARCH64)
 	(match_operand:SF 0 "register_operand" "f"))
    (return)]
   "! TARGET_EPILOGUE"
-  "ret\;fmovs %0,%%f0"
+  "ret\;fmovs\\t%0, %%f0"
   [(set_attr "type" "multi")])
 
 ;; Now peepholes to do a call followed by a jump.
@@ -6616,16 +8154,20 @@ if (! TARGET_ARCH64)
 			 (match_operand 2 "" "")))
 	      (clobber (reg:SI 15))])
    (set (pc) (label_ref (match_operand 3 "" "")))]
-  "short_branch (INSN_UID (insn), INSN_UID (operands[3])) && in_same_eh_region (insn, operands[3]) && in_same_eh_region (insn, ins1)"
-  "call %a1,%2\;add %%o7,(%l3-.-4),%%o7")
+  "short_branch (INSN_UID (insn), INSN_UID (operands[3]))
+   && in_same_eh_region (insn, operands[3])
+   && in_same_eh_region (insn, ins1)"
+  "call\\t%a1, %2\\n\\tadd\\t%%o7, (%l3-.-4), %%o7")
 
 (define_peephole
   [(parallel [(call (mem:SI (match_operand:SI 0 "call_operand_address" "ps"))
 		    (match_operand 1 "" ""))
 	      (clobber (reg:SI 15))])
    (set (pc) (label_ref (match_operand 2 "" "")))]
-  "short_branch (INSN_UID (insn), INSN_UID (operands[2])) && in_same_eh_region (insn, operands[2]) && in_same_eh_region (insn, ins1)"
-  "call %a0,%1\;add %%o7,(%l2-.-4),%%o7")
+  "short_branch (INSN_UID (insn), INSN_UID (operands[2]))
+   && in_same_eh_region (insn, operands[2])
+   && in_same_eh_region (insn, ins1)"
+  "call\\t%a0, %1\\n\\tadd\\t%%o7, (%l2-.-4), %%o7")
 
 (define_peephole
   [(parallel [(set (match_operand 0 "" "")
@@ -6633,31 +8175,40 @@ if (! TARGET_ARCH64)
 			 (match_operand 2 "" "")))
 	      (clobber (reg:DI 15))])
    (set (pc) (label_ref (match_operand 3 "" "")))]
-  "TARGET_ARCH64 && short_branch (INSN_UID (insn), INSN_UID (operands[3])) && in_same_eh_region (insn, operands[3]) && in_same_eh_region (insn, ins1)"
-  "call %a1,%2\;add %%o7,(%l3-.-4),%%o7")
+  "TARGET_ARCH64
+   && short_branch (INSN_UID (insn), INSN_UID (operands[3]))
+   && in_same_eh_region (insn, operands[3])
+   && in_same_eh_region (insn, ins1)"
+  "call\\t%a1, %2\\n\\tadd\\t%%o7, (%l3-.-4), %%o7")
 
 (define_peephole
   [(parallel [(call (mem:SI (match_operand:DI 0 "call_operand_address" "ps"))
 		    (match_operand 1 "" ""))
 	      (clobber (reg:DI 15))])
    (set (pc) (label_ref (match_operand 2 "" "")))]
-  "TARGET_ARCH64 && short_branch (INSN_UID (insn), INSN_UID (operands[2])) && in_same_eh_region (insn, operands[2]) && in_same_eh_region (insn, ins1)"
-  "call %a0,%1\;add %%o7,(%l2-.-4),%%o7")
+  "TARGET_ARCH64
+   && short_branch (INSN_UID (insn), INSN_UID (operands[2]))
+   && in_same_eh_region (insn, operands[2])
+   && in_same_eh_region (insn, ins1)"
+  "call\\t%a0, %1\\n\\tadd\\t%%o7, (%l2-.-4), %%o7")
 
 ;; After a nonlocal goto, we need to restore the PIC register, but only
 ;; if we need it.  So do nothing much here, but we'll check for this in
 ;; finalize_pic.
 
+;; Make sure this unspec_volatile number agrees with finalize_pic.
 (define_insn "nonlocal_goto_receiver"
-  [(unspec_volatile [(const_int 0)] 4)]
+  [(unspec_volatile [(const_int 0)] 5)]
   "flag_pic"
-  "")
+  ""
+  [(set_attr "length" "0")])
 
 (define_insn "trap"
   [(trap_if (const_int 1) (const_int 5))]
   ""
-  "ta 5"
-  [(set_attr "type" "misc")])
+  "ta\\t5"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_expand "conditional_trap"
   [(trap_if (match_operator 0 "noov_compare_op"
@@ -6672,13 +8223,14 @@ if (! TARGET_ARCH64)
   [(trap_if (match_operator 0 "noov_compare_op" [(reg:CC 100) (const_int 0)])
 	    (match_operand:SI 1 "arith_operand" "rM"))]
   ""
-  "t%C0 %1"
-  [(set_attr "type" "misc")])
+  "t%C0\\t%1"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
 
 (define_insn ""
   [(trap_if (match_operator 0 "noov_compare_op" [(reg:CCX 100) (const_int 0)])
 	    (match_operand:SI 1 "arith_operand" "rM"))]
   "TARGET_V9"
-  "t%C0 %%xcc,%1"
-  [(set_attr "type" "misc")])
-
+  "t%C0\\t%%xcc, %1"
+  [(set_attr "type" "misc")
+   (set_attr "length" "1")])
diff --git a/contrib/gcc/config/sparc/splet.h b/contrib/gcc/config/sparc/splet.h
index 23c6414..d924e70 100644
--- a/contrib/gcc/config/sparc/splet.h
+++ b/contrib/gcc/config/sparc/splet.h
@@ -29,12 +29,12 @@ Boston, MA 02111-1307, USA.  */
 /* -mlive-g0 is only supported on the sparclet.  */
 #undef SUBTARGET_SWITCHES
 #define SUBTARGET_SWITCHES \
-{"big-endian", -MASK_LITTLE_ENDIAN},		\
-{"little-endian", MASK_LITTLE_ENDIAN},		\
-{"live-g0", MASK_LIVE_G0},			\
-{"no-live-g0", -MASK_LIVE_G0},			\
-{"broken-saverestore", MASK_BROKEN_SAVERESTORE},	\
-{"no-broken-saverestore", -MASK_BROKEN_SAVERESTORE},
+{"big-endian", -MASK_LITTLE_ENDIAN, "Generate code for big endian" }, \
+{"little-endian", MASK_LITTLE_ENDIAN, "Generate code for little endian" }, \
+{"live-g0", MASK_LIVE_G0, "Use g0 as a normal register" }, \
+{"no-live-g0", -MASK_LIVE_G0, "Register g0 is fixed with a zero value" }, \
+{"broken-saverestore", MASK_BROKEN_SAVERESTORE, "Enable save/restore bug workarounds" }, \
+{"no-broken-saverestore", -MASK_BROKEN_SAVERESTORE, "Disable save/restore bug workarouns" },
 
 #undef ASM_SPEC
 #define ASM_SPEC "%{mlittle-endian:-EL} %(asm_cpu)"
@@ -51,3 +51,19 @@ Boston, MA 02111-1307, USA.  */
 #define BYTES_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
 #undef WORDS_BIG_ENDIAN
 #define WORDS_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS 					\
+  do {									\
+    if (TARGET_LIVE_G0)							\
+      {									\
+	warning ("Option '-mlive-g0' deprecated.");			\
+        target_flags &= ~MASK_LIVE_G0;					\
+      }									\
+    else if (TARGET_BROKEN_SAVERESTORE)					\
+      {									\
+	warning ("Option '-mbroken-saverestore' deprecated.");		\
+        target_flags &= ~MASK_BROKEN_SAVERESTORE;			\
+      }									\
+  } while (0)
+
diff --git a/contrib/gcc/config/sparc/sun4o3.h b/contrib/gcc/config/sparc/sun4o3.h
index 10c7391..d2a53c1 100644
--- a/contrib/gcc/config/sparc/sun4o3.h
+++ b/contrib/gcc/config/sparc/sun4o3.h
@@ -1,9 +1,9 @@
 #include "sparc/sparc.h"
 
-#undef FUNCTION_PROFILER
-#define FUNCTION_PROFILER(FILE, LABELNO)  \
-  fprintf (FILE, "\tsethi %%hi(LP%d),%%o0\n\tcall .mcount\n\tor %%lo(LP%d),%%o0,%%o0\n", \
-	   (LABELNO), (LABELNO))
+/* Override the name of the mcount profiling function.  */
+
+#undef MCOUNT_FUNCTION
+#define MCOUNT_FUNCTION "*.mcount"
 
 /* LINK_SPEC is needed only for SunOS 4.  */
 
diff --git a/contrib/gcc/config/sparc/sysv4.h b/contrib/gcc/config/sparc/sysv4.h
index 7e90bdd..5f9bba9 100644
--- a/contrib/gcc/config/sparc/sysv4.h
+++ b/contrib/gcc/config/sparc/sysv4.h
@@ -66,7 +66,9 @@ Boston, MA 02111-1307, USA.  */
 /* The native assembler can't compute differences between symbols in different
    sections when generating pic code, so we must put jump tables in the
    text section.  */
-#define JUMP_TABLES_IN_TEXT_SECTION 1
+/* But we now defer the tables to the end of the function, so we make
+   this 0 to not confuse the branch shortening code.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 0
 
 /* Pass -K to the assembler when PIC.  */
 #undef ASM_SPEC
@@ -191,35 +193,13 @@ do { ASM_OUTPUT_ALIGN ((FILE), Pmode == SImode ? 2 : 3);		\
 #define ASM_OUTPUT_SECTION_NAME(FILE, DECL, NAME, RELOC) \
 do {									\
   if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL)			\
-    fprintf (FILE, ".section\t\"%s%s\",#alloc,#execinstr\n",		\
-	     flag_function_sections ? ".text%" : "", (NAME));		\
+    fprintf (FILE, ".section\t\"%s\",#alloc,#execinstr\n",		\
+	                                      (NAME));		\
   else if ((DECL) && DECL_READONLY_SECTION (DECL, RELOC))		\
     fprintf (FILE, ".section\t\"%s\",#alloc\n", (NAME));		\
   else									\
     fprintf (FILE, ".section\t\"%s\",#alloc,#write\n", (NAME));		\
 } while (0)
-
-/* Output assembler code to FILE to initialize this source file's
-   basic block profiling info, if that has not already been done.  */
-
-#undef FUNCTION_BLOCK_PROFILER
-#define FUNCTION_BLOCK_PROFILER(FILE, LABELNO)  \
-  do { \
-    fprintf (FILE, "\tsethi %%hi(.LLPBX0),%%o0\n\tld [%%lo(.LLPBX0)+%%o0],%%o1\n\ttst %%o1\n\tbne LPY%d\n\tadd %%o0,%%lo(.LLPBX0),%%o0\n\tcall __bb_init_func\n\tnop\nLPY%d:\n", \
-	     (LABELNO), (LABELNO)); \
-  } while (0)
-
-/* Output assembler code to FILE to increment the entry-count for
-   the BLOCKNO'th basic block in this source file.  */
-
-#undef BLOCK_PROFILER
-#define BLOCK_PROFILER(FILE, BLOCKNO) \
-{ \
-  int blockn = (BLOCKNO); \
-  fprintf (FILE, "\tsethi %%hi(.LLPBX2+%d),%%g1\n\tld [%%lo(.LLPBX2+%d)+%%g1],%%g2\n\
-\tadd %%g2,1,%%g2\n\tst %%g2,[%%lo(.LLPBX2+%d)+%%g1]\n", \
-	   4 * blockn, 4 * blockn, 4 * blockn); \
-}
 
 /* A C statement (sans semicolon) to output to the stdio stream
    FILE the assembler definition of uninitialized global DECL named
@@ -229,3 +209,8 @@ do {									\
 #undef ASM_OUTPUT_ALIGNED_BSS
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
   asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Override the name of the mcount profiling function.  */
+
+#undef MCOUNT_FUNCTION
+#define MCOUNT_FUNCTION "*_mcount"
diff --git a/contrib/gcc/config/sparc/t-halos b/contrib/gcc/config/sparc/t-halos
new file mode 100644
index 0000000..0bd5496
--- /dev/null
+++ b/contrib/gcc/config/sparc/t-halos
@@ -0,0 +1,2 @@
+# For a native HALOS compile, we need to set -e1 for the assembler
+AS=as -e1
diff --git a/contrib/gcc/config/sparc/t-linux64 b/contrib/gcc/config/sparc/t-linux64
new file mode 100644
index 0000000..077cf69
--- /dev/null
+++ b/contrib/gcc/config/sparc/t-linux64
@@ -0,0 +1,21 @@
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_MATCHES =
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o
+
+tcrtbeginS.o: crtstuff.c $(GCC_PASSES) $(CONFIG_H) \
+  defaults.h frame.h gbl-ctors.h
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -g0 \
+	  -finhibit-size-directive -fno-inline-functions -fno-exceptions $(CRTSTUFF_T_CFLAGS_S) \
+	  -c $(srcdir)/crtstuff.c -DCRT_BEGIN -o tcrtbeginS$(objext)
+
+tcrtendS.o: crtstuff.c $(GCC_PASSES) $(CONFIG_H) \
+  defaults.h frame.h gbl-ctors.h
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -g0 \
+	  -finhibit-size-directive -fno-inline-functions -fno-exceptions $(CRTSTUFF_T_CFLAGS_S) \
+	  -c $(srcdir)/crtstuff.c -DCRT_END -o tcrtendS$(objext)
+
diff --git a/contrib/gcc/config/sparc/t-sol2 b/contrib/gcc/config/sparc/t-sol2
index d41254a..a9b6ee1 100644
--- a/contrib/gcc/config/sparc/t-sol2
+++ b/contrib/gcc/config/sparc/t-sol2
@@ -6,19 +6,19 @@ CROSS_LIBGCC1 =
 LIBGCC1_TEST =
 
 # gmon build rule:
-gmon.o:	$(srcdir)/config/sparc/gmon-sol2.c $(GCC_PASSES) $(CONFIG_H) stmp-int-hdrs
-	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) \
-		-c $(srcdir)/config/sparc/gmon-sol2.c -o gmon.o
+$(T)gmon.o:	$(srcdir)/config/sparc/gmon-sol2.c $(GCC_PASSES) $(CONFIG_H) stmp-int-hdrs
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \
+		-c $(srcdir)/config/sparc/gmon-sol2.c -o $(T)gmon.o
 
 # Assemble startup files.
-crt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES)
-	$(GCC_FOR_TARGET) -c -o crt1.o -x assembler $(srcdir)/config/sparc/sol2-c1.asm
-crti.o: $(srcdir)/config/sparc/sol2-ci.asm $(GCC_PASSES)
-	$(GCC_FOR_TARGET) -c -o crti.o -x assembler $(srcdir)/config/sparc/sol2-ci.asm
-crtn.o: $(srcdir)/config/sparc/sol2-cn.asm $(GCC_PASSES)
-	$(GCC_FOR_TARGET) -c -o crtn.o -x assembler $(srcdir)/config/sparc/sol2-cn.asm
-gcrt1.o: $(srcdir)/config/sparc/sol2-g1.asm $(GCC_PASSES)
-	$(GCC_FOR_TARGET) -c -o gcrt1.o -x assembler $(srcdir)/config/sparc/sol2-g1.asm
+$(T)crt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-c1.asm
+$(T)crti.o: $(srcdir)/config/sparc/sol2-ci.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-ci.asm
+$(T)crtn.o: $(srcdir)/config/sparc/sol2-cn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-cn.asm
+$(T)gcrt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -DGCRT1 -o $(T)gcrt1.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-c1.asm
 
 # We need to use -fPIC when we are using gcc to compile the routines in
 # crtstuff.c.  This is only really needed when we are going to use gcc/g++
diff --git a/contrib/gcc/config/sparc/t-sol2-64 b/contrib/gcc/config/sparc/t-sol2-64
new file mode 100644
index 0000000..8d42c44
--- /dev/null
+++ b/contrib/gcc/config/sparc/t-sol2-64
@@ -0,0 +1,8 @@
+MULTILIB_OPTIONS = m32/m64
+MULTILIB_DIRNAMES = sparcv7 sparcv9
+MULTILIB_MATCHES =
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o gmon.o crt1.o crti.o crtn.o gcrt1.o
diff --git a/contrib/gcc/config/sparc/t-splet b/contrib/gcc/config/sparc/t-splet
index 3409f5d..3329e0b 100644
--- a/contrib/gcc/config/sparc/t-splet
+++ b/contrib/gcc/config/sparc/t-splet
@@ -16,8 +16,7 @@ fp-bit.c: $(srcdir)/config/fp-bit.c
 	echo '#define FLOAT' > fp-bit.c
 	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
 
-MULTILIB_OPTIONS = mlittle-endian mlive-g0 mbroken-saverestore
-MULTILIB_DIRNAMES = little live-g0 brknsave
-
+MULTILIB_OPTIONS = mlittle-endian mflat
+MULTILIB_DIRNAMES = little flat
 LIBGCC = stmp-multilib
 INSTALL_LIBGCC = install-multilib
diff --git a/contrib/gcc/config/sparc/xm-sp64.h b/contrib/gcc/config/sparc/xm-sp64.h
index 5954aff..b673161 100644
--- a/contrib/gcc/config/sparc/xm-sp64.h
+++ b/contrib/gcc/config/sparc/xm-sp64.h
@@ -21,5 +21,7 @@ Boston, MA 02111-1307, USA.  */
 #include <sparc/xm-sparc.h>
 
 /* This describes the machine the compiler is hosted on.  */
+#if defined(__arch64__) || defined(__sparc_v9__) || defined(__sparcv9)
 #undef HOST_BITS_PER_LONG
 #define HOST_BITS_PER_LONG 64
+#endif
diff --git a/contrib/gcc/config/sparc/xm-sysv4-64.h b/contrib/gcc/config/sparc/xm-sysv4-64.h
new file mode 100644
index 0000000..c506d22
--- /dev/null
+++ b/contrib/gcc/config/sparc/xm-sysv4-64.h
@@ -0,0 +1,27 @@
+/* Configuration for GCC for Sparc v9 running 64-bit native.
+   Copyright (C) 1998 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include <sparc/xm-sysv4.h>
+
+/* This describes the machine the compiler is hosted on.  */
+#if defined(__arch64__) || defined(__sparc_v9__) || defined(__sparcv9)
+#undef HOST_BITS_PER_LONG
+#define HOST_BITS_PER_LONG 64
+#endif
author	obrien <obrien@FreeBSD.org>	1999-10-16 06:09:09 +0000
committer	obrien <obrien@FreeBSD.org>	1999-10-16 06:09:09 +0000
commit	cae8fa8120c70195f34a2456f18c4c848a2d3e0c (patch)
tree	f7d3a3ab9c32694206552e767626366f016f2062 /contrib/gcc/config/sparc
parent	84656b55b6e25e30322dc903a05de53706361d3d (diff)
download	FreeBSD-src-cae8fa8120c70195f34a2456f18c4c848a2d3e0c.zip FreeBSD-src-cae8fa8120c70195f34a2456f18c4c848a2d3e0c.tar.gz