37 files changed, 3065 insertions, 2403 deletions
diff --git a/contrib/gcc/config/alpha/alpha.c b/contrib/gcc/config/alpha/alpha.c
index 9657e56..42b57e1 100644
--- a/contrib/gcc/config/alpha/alpha.c
+++ b/contrib/gcc/config/alpha/alpha.c
@@ -2949,12 +2949,8 @@ alpha_expand_mov (mode, operands)
 	 compiled at the end of compilation.  In the meantime, someone can
 	 re-encode-section-info on some symbol changing it e.g. from global
 	 to local-not-small.  If this happens, we'd have emitted a plain
-	 load rather than a high+losum load and not recognize the insn.
-
-	 So if rtl inlining is in effect, we delay the global/not-global
-	 decision until rest_of_compilation by wrapping it in an
-	 UNSPEC_SYMBOL.  */
-      if (TARGET_EXPLICIT_RELOCS && flag_inline_functions
+	 load rather than a high+losum load and not recognize the insn.  */
+      if (TARGET_EXPLICIT_RELOCS
 	  && rtx_equal_function_value_matters
 	  && global_symbolic_operand (operands[1], mode))
 	{
@@ -3336,10 +3332,10 @@ alpha_emit_conditional_branch (code)
 	  if (op1 == const0_rtx)
 	    cmp_code = NIL, branch_code = code;
 
-	  /* We want to use cmpcc/bcc when we can, since there is a zero delay
-	     bypass between logicals and br/cmov on EV5.  But we don't want to
-	     force valid immediate constants into registers needlessly.  */
-	  else if (GET_CODE (op1) == CONST_INT)
+	  /* If the constants doesn't fit into an immediate, but can
+ 	     be generated by lda/ldah, we adjust the argument and
+ 	     compare against zero, so we can use beq/bne directly.  */
+	  else if (GET_CODE (op1) == CONST_INT && (code == EQ || code == NE))
 	    {
 	      HOST_WIDE_INT v = INTVAL (op1), n = -v;
 
@@ -6748,14 +6744,21 @@ alpha_sa_mask (imaskP, fmaskP)
 
   /* We need to restore these for the handler.  */
   if (current_function_calls_eh_return)
-    for (i = 0; ; ++i)
-      {
-	unsigned regno = EH_RETURN_DATA_REGNO (i);
-	if (regno == INVALID_REGNUM)
-	  break;
-	imask |= 1L << regno;
-      }
-     
+    {
+      for (i = 0; ; ++i)
+	{
+	  unsigned regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+	  imask |= 1L << regno;
+	}
+
+      /* Glibc likes to use $31 as an unwind stopper for crt0.  To
+	 avoid hackery in unwind-dw2.c, we need to actively store a
+	 zero in the prologue of _Unwind_RaiseException et al.  */
+      imask |= 1UL << 31;
+    }
+
   /* If any register spilled, then spill the return address also.  */
   /* ??? This is required by the Digital stack unwind specification
      and isn't needed if we're doing Dwarf2 unwinding.  */
@@ -7210,7 +7213,7 @@ alpha_expand_prologue ()
 	}
 
       /* Now save any other registers required to be saved.  */
-      for (i = 0; i < 32; i++)
+      for (i = 0; i < 31; i++)
 	if (imask & (1L << i))
 	  {
 	    mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
@@ -7219,7 +7222,25 @@ alpha_expand_prologue ()
 	    reg_offset += 8;
 	  }
 
-      for (i = 0; i < 32; i++)
+      /* Store a zero if requested for unwinding.  */
+      if (imask & (1UL << 31))
+ 	{
+ 	  rtx insn, t;
+ 
+ 	  mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
+ 	  set_mem_alias_set (mem, alpha_sr_alias_set);
+ 	  insn = emit_move_insn (mem, const0_rtx);
+ 
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  t = gen_rtx_REG (Pmode, 31);
+ 	  t = gen_rtx_SET (VOIDmode, mem, t);
+ 	  t = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, t, REG_NOTES (insn));
+ 	  REG_NOTES (insn) = t;
+ 
+ 	  reg_offset += 8;
+ 	}
+ 
+      for (i = 0; i < 31; i++)
 	if (fmask & (1L << i))
 	  {
 	    mem = gen_rtx_MEM (DFmode, plus_constant (sa_reg, reg_offset));
@@ -7625,7 +7646,7 @@ alpha_expand_epilogue ()
       reg_offset += 8;
       imask &= ~(1L << REG_RA);
 
-      for (i = 0; i < 32; ++i)
+      for (i = 0; i < 31; ++i)
 	if (imask & (1L << i))
 	  {
 	    if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
@@ -7639,7 +7660,10 @@ alpha_expand_epilogue ()
 	    reg_offset += 8;
 	  }
 
-      for (i = 0; i < 32; ++i)
+      if (imask & (1UL << 31))
+	reg_offset += 8;
+
+      for (i = 0; i < 31; ++i)
 	if (fmask & (1L << i))
 	  {
 	    mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset));
diff --git a/contrib/gcc/config/alpha/alpha.h b/contrib/gcc/config/alpha/alpha.h
index b933ea3..6b52700 100644
--- a/contrib/gcc/config/alpha/alpha.h
+++ b/contrib/gcc/config/alpha/alpha.h
@@ -1276,6 +1276,7 @@ do {						\
 /* Before the prologue, RA lives in $26.  */
 #define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, 26)
 #define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26)
+#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64)
 
 /* Describe how we implement __builtin_eh_return.  */
 #define EH_RETURN_DATA_REGNO(N)	((N) < 4 ? (N) + 16 : INVALID_REGNUM)
diff --git a/contrib/gcc/config/alpha/alpha.md b/contrib/gcc/config/alpha/alpha.md
index a4c0ae6..f7e9fa4 100644
--- a/contrib/gcc/config/alpha/alpha.md
+++ b/contrib/gcc/config/alpha/alpha.md
@@ -5399,7 +5399,7 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
 		   UNSPEC_SYMBOL))]
-  "TARGET_EXPLICIT_RELOCS && flag_inline_functions"
+  "TARGET_EXPLICIT_RELOCS"
   "#"
   ""
   [(set (match_dup 0) (match_dup 1))]
diff --git a/contrib/gcc/config/alpha/linux.h b/contrib/gcc/config/alpha/linux.h
index 0c53344..3a2940c 100644
--- a/contrib/gcc/config/alpha/linux.h
+++ b/contrib/gcc/config/alpha/linux.h
@@ -61,6 +61,9 @@ Boston, MA 02111-1307, USA.  */
 
 #define TARGET_HAS_F_SETLKW
 
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
 /* Do code reading to identify a signal frame, and set the frame
    state data appropriately.  See unwind-dw2.c for the structs.  */
 
@@ -78,6 +81,8 @@ Boston, MA 02111-1307, USA.  */
     if (pc_[0] != 0x47fe0410		/* mov $30,$16 */		\
         || pc_[2] != 0x00000083		/* callsys */)			\
       break;								\
+    if ((CONTEXT)->cfa == 0)						\
+      break;								\
     if (pc_[1] == 0x201f0067)		/* lda $0,NR_sigreturn */	\
       sc_ = (CONTEXT)->cfa;						\
     else if (pc_[1] == 0x201f015f)	/* lda $0,NR_rt_sigreturn */	\
@@ -106,8 +111,8 @@ Boston, MA 02111-1307, USA.  */
 	(FS)->regs.reg[i_+32].loc.offset				\
 	  = (long)&sc_->sc_fpregs[i_] - new_cfa_;			\
       }									\
-    (FS)->regs.reg[31].how = REG_SAVED_OFFSET;				\
-    (FS)->regs.reg[31].loc.offset = (long)&sc_->sc_pc - new_cfa_;	\
-    (FS)->retaddr_column = 31;						\
+    (FS)->regs.reg[64].how = REG_SAVED_OFFSET;				\
+    (FS)->regs.reg[64].loc.offset = (long)&sc_->sc_pc - new_cfa_;	\
+    (FS)->retaddr_column = 64;						\
     goto SUCCESS;							\
   } while (0)
diff --git a/contrib/gcc/config/arm/linux-elf.h b/contrib/gcc/config/arm/linux-elf.h
index c6d14e7..8cc812f 100644
--- a/contrib/gcc/config/arm/linux-elf.h
+++ b/contrib/gcc/config/arm/linux-elf.h
@@ -123,3 +123,6 @@ Boston, MA 02111-1307, USA.  */
 
 #undef  CC1_SPEC
 #define CC1_SPEC "%{profile:-p}"
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
diff --git a/contrib/gcc/config/darwin.h b/contrib/gcc/config/darwin.h
index 90d959c..c4b7526 100644
--- a/contrib/gcc/config/darwin.h
+++ b/contrib/gcc/config/darwin.h
@@ -380,6 +380,10 @@ do { text_section ();							\
         || DECL_INITIAL (DECL))                                         \
       (* targetm.encode_section_info) (DECL, false);			\
     ASM_OUTPUT_LABEL (FILE, xname);                                     \
+    /* Darwin doesn't support zero-size objects, so give them a 	\
+       byte.  */							\
+    if (tree_low_cst (DECL_SIZE_UNIT (DECL), 1) == 0)			\
+      assemble_zeros (1);						\
   } while (0)
 
 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)                     \
@@ -401,6 +405,15 @@ do { text_section ();							\
     machopic_output_possible_stub_label (FILE, xname);			\
   } while (0)
 
+#define ASM_DECLARE_CONSTANT_NAME(FILE, NAME, EXP, SIZE)	\
+  do {								\
+    ASM_OUTPUT_LABEL (FILE, NAME);				\
+    /* Darwin doesn't support zero-size objects, so give them a	\
+       byte.  */						\
+    if ((SIZE) == 0)						\
+      assemble_zeros (1);					\
+  } while (0)
+
 /* Wrap new method names in quotes so the assembler doesn't gag.
    Make Objective-C internal symbols local.  */
 
diff --git a/contrib/gcc/config/i386/emmintrin.h b/contrib/gcc/config/i386/emmintrin.h
new file mode 100644
index 0000000..7007fc5
--- /dev/null
+++ b/contrib/gcc/config/i386/emmintrin.h
@@ -0,0 +1,1499 @@
+/* Copyright (C) 2003 Free Software Foundation, Inc.
+
+   This file is part of GNU CC.
+
+   GNU CC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GNU CC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GNU CC; see the file COPYING.  If not, write to
+   the Free Software Foundation, 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 8.0.  */
+
+#ifndef _EMMINTRIN_H_INCLUDED
+#define _EMMINTRIN_H_INCLUDED
+
+#ifdef __SSE2__
+#include <xmmintrin.h>
+
+/* SSE2 */
+typedef int __v2df __attribute__ ((mode (V2DF)));
+typedef int __v2di __attribute__ ((mode (V2DI)));
+typedef int __v4si __attribute__ ((mode (V4SI)));
+typedef int __v8hi __attribute__ ((mode (V8HI)));
+typedef int __v16qi __attribute__ ((mode (V16QI)));
+
+/* Create a selector for use with the SHUFPD instruction.  */
+#define _MM_SHUFFLE2(fp1,fp0) \
+ (((fp1) << 1) | (fp0))
+
+#define __m128i __v2di
+#define __m128d __v2df
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+static __inline __m128d
+_mm_load_sd (double const *__P)
+{
+  return (__m128d) __builtin_ia32_loadsd (__P);
+}
+
+/* Create a vector with all two elements equal to *P.  */
+static __inline __m128d
+_mm_load1_pd (double const *__P)
+{
+  __v2df __tmp = __builtin_ia32_loadsd (__P);
+  return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));
+}
+
+static __inline __m128d
+_mm_load_pd1 (double const *__P)
+{
+  return _mm_load1_pd (__P);
+}
+
+/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
+static __inline __m128d
+_mm_load_pd (double const *__P)
+{
+  return (__m128d) __builtin_ia32_loadapd (__P);
+}
+
+/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
+static __inline __m128d
+_mm_loadu_pd (double const *__P)
+{
+  return (__m128d) __builtin_ia32_loadupd (__P);
+}
+
+/* Load two DPFP values in reverse order.  The address must be aligned.  */
+static __inline __m128d
+_mm_loadr_pd (double const *__P)
+{
+  __v2df __tmp = __builtin_ia32_loadapd (__P);
+  return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
+}
+
+/* Create a vector with element 0 as F and the rest zero.  */
+static __inline __m128d
+_mm_set_sd (double __F)
+{
+  return (__m128d) __builtin_ia32_loadsd (&__F);
+}
+
+/* Create a vector with all two elements equal to F.  */
+static __inline __m128d
+_mm_set1_pd (double __F)
+{
+  __v2df __tmp = __builtin_ia32_loadsd (&__F);
+  return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));
+}
+
+static __inline __m128d
+_mm_set_pd1 (double __F)
+{
+  return _mm_set1_pd (__F);
+}
+
+/* Create the vector [Z Y].  */
+static __inline __m128d
+_mm_set_pd (double __Z, double __Y)
+{
+  union {
+    double __a[2];
+    __m128d __v;
+  } __u;
+
+  __u.__a[0] = __Y;
+  __u.__a[1] = __Z;
+
+  return __u.__v;
+}
+
+/* Create the vector [Y Z].  */
+static __inline __m128d
+_mm_setr_pd (double __Z, double __Y)
+{
+  return _mm_set_pd (__Y, __Z);
+}
+
+/* Create a vector of zeros.  */
+static __inline __m128d
+_mm_setzero_pd (void)
+{
+  return (__m128d) __builtin_ia32_setzeropd ();
+}
+
+/* Stores the lower DPFP value.  */
+static __inline void
+_mm_store_sd (double *__P, __m128d __A)
+{
+  __builtin_ia32_storesd (__P, (__v2df)__A);
+}
+
+/* Store the lower DPFP value acrosd two words.  */
+static __inline void
+_mm_store1_pd (double *__P, __m128d __A)
+{
+  __v2df __va = (__v2df)__A;
+  __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0));
+  __builtin_ia32_storeapd (__P, __tmp);
+}
+
+static __inline void
+_mm_store_pd1 (double *__P, __m128d __A)
+{
+  _mm_store1_pd (__P, __A);
+}
+
+/* Store two DPFP values.  The address must be 16-byte aligned.  */
+static __inline void
+_mm_store_pd (double *__P, __m128d __A)
+{
+  __builtin_ia32_storeapd (__P, (__v2df)__A);
+}
+
+/* Store two DPFP values.  The address need not be 16-byte aligned.  */
+static __inline void
+_mm_storeu_pd (double *__P, __m128d __A)
+{
+  __builtin_ia32_storeupd (__P, (__v2df)__A);
+}
+
+/* Store two DPFP values in reverse order.  The address must be aligned.  */
+static __inline void
+_mm_storer_pd (double *__P, __m128d __A)
+{
+  __v2df __va = (__v2df)__A;
+  __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1));
+  __builtin_ia32_storeapd (__P, __tmp);
+}
+
+/* Sets the low DPFP value of A from the low value of B.  */
+static __inline __m128d
+_mm_move_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+}
+
+
+static __inline __m128d
+_mm_add_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_add_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_sub_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_sub_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_mul_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_mul_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_div_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_div_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_sqrt_pd (__m128d __A)
+{
+  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+}
+
+/* Return pair {sqrt (A[0), B[1]}.  */
+static __inline __m128d
+_mm_sqrt_sd (__m128d __A, __m128d __B)
+{
+  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
+}
+
+static __inline __m128d
+_mm_min_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_min_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_max_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_max_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_and_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_andnot_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_or_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_xor_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpeq_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmplt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmple_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpgt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpge_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpneq_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpnlt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpnle_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpngt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpnge_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpord_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpunord_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpeq_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmplt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmple_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpgt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpltsd ((__v2df) __B,
+								 (__v2df)
+								 __A));
+}
+
+static __inline __m128d
+_mm_cmpge_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmplesd ((__v2df) __B,
+								 (__v2df)
+								 __A));
+}
+
+static __inline __m128d
+_mm_cmpneq_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpnlt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpnle_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpngt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpnltsd ((__v2df) __B,
+								  (__v2df)
+								  __A));
+}
+
+static __inline __m128d
+_mm_cmpnge_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpnlesd ((__v2df) __B,
+								  (__v2df)
+								  __A));
+}
+
+static __inline __m128d
+_mm_cmpord_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_cmpunord_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_comieq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_comilt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_comile_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_comigt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_comige_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_comineq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_ucomieq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_ucomilt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_ucomile_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_ucomigt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_ucomige_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_ucomineq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+
+static __inline __m128i
+_mm_load_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqa ((char const *)__P);
+}
+
+static __inline __m128i
+_mm_loadu_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
+}
+
+static __inline __m128i
+_mm_loadl_epi64 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P);
+}
+
+static __inline void
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+  __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B);
+}
+
+static __inline void
+_mm_storeu_si128 (__m128i *__P, __m128i __B)
+{
+  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
+}
+
+static __inline void
+_mm_storel_epi64 (__m128i *__P, __m128i __B)
+{
+  *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
+}
+
+static __inline __m64
+_mm_movepi64_pi64 (__m128i __B)
+{
+  return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
+}
+
+static __inline __m128i
+_mm_move_epi64 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_movq ((__v2di)__A);
+}
+
+/* Create a vector of zeros.  */
+static __inline __m128i
+_mm_setzero_si128 (void)
+{
+  return (__m128i) __builtin_ia32_setzero128 ();
+}
+
+static __inline __m128i
+_mm_set_epi64 (__m64 __A,  __m64 __B)
+{
+  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+  __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
+  return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp);
+}
+
+/* Create the vector [Z Y X W].  */
+static __inline __m128i
+_mm_set_epi32 (int __Z, int __Y, int __X, int __W)
+{
+  union {
+    int __a[4];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __W;
+  __u.__a[1] = __X;
+  __u.__a[2] = __Y;
+  __u.__a[3] = __Z;
+
+  return __u.__v;
+}
+
+#ifdef __x86_64__
+/* Create the vector [Z Y].  */
+static __inline __m128i
+_mm_set_epi64x (long long __Z, long long __Y)
+{
+  union {
+    long __a[2];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __Y;
+  __u.__a[1] = __Z;
+
+  return __u.__v;
+}
+#endif
+
+/* Create the vector [S T U V Z Y X W].  */
+static __inline __m128i
+_mm_set_epi16 (short __Z, short __Y, short __X, short __W,
+	       short __V, short __U, short __T, short __S)
+{
+  union {
+    short __a[8];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __S;
+  __u.__a[1] = __T;
+  __u.__a[2] = __U;
+  __u.__a[3] = __V;
+  __u.__a[4] = __W;
+  __u.__a[5] = __X;
+  __u.__a[6] = __Y;
+  __u.__a[7] = __Z;
+
+  return __u.__v;
+}
+
+/* Create the vector [S T U V Z Y X W].  */
+static __inline __m128i
+_mm_set_epi8 (char __Z, char __Y, char __X, char __W,
+	      char __V, char __U, char __T, char __S,
+	      char __Z1, char __Y1, char __X1, char __W1,
+	      char __V1, char __U1, char __T1, char __S1)
+{
+  union {
+    char __a[16];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __S1;
+  __u.__a[1] = __T1;
+  __u.__a[2] = __U1;
+  __u.__a[3] = __V1;
+  __u.__a[4] = __W1;
+  __u.__a[5] = __X1;
+  __u.__a[6] = __Y1;
+  __u.__a[7] = __Z1;
+  __u.__a[8] = __S;
+  __u.__a[9] = __T;
+  __u.__a[10] = __U;
+  __u.__a[11] = __V;
+  __u.__a[12] = __W;
+  __u.__a[13] = __X;
+  __u.__a[14] = __Y;
+  __u.__a[15] = __Z;
+
+  return __u.__v;
+}
+
+static __inline __m128i
+_mm_set1_epi64 (__m64 __A)
+{
+  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+  return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp);
+}
+
+static __inline __m128i
+_mm_set1_epi32 (int __A)
+{
+  __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A);
+  return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
+}
+
+#ifdef __x86_64__
+static __inline __m128i
+_mm_set1_epi64x (long long __A)
+{
+  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+  return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
+}
+#endif
+
+static __inline __m128i
+_mm_set1_epi16 (short __A)
+{
+  int __Acopy = (unsigned short)__A;
+  __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
+  __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp);
+  return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
+}
+
+static __inline __m128i
+_mm_set1_epi8 (char __A)
+{
+  int __Acopy = (unsigned char)__A;
+  __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
+  __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
+  __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
+  return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
+}
+
+static __inline __m128i
+_mm_setr_epi64 (__m64 __A,  __m64 __B)
+{
+  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+  __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
+  return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2);
+}
+
+/* Create the vector [Z Y X W].  */
+static __inline __m128i
+_mm_setr_epi32 (int __W, int __X, int __Y, int __Z)
+{
+  union {
+    int __a[4];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __W;
+  __u.__a[1] = __X;
+  __u.__a[2] = __Y;
+  __u.__a[3] = __Z;
+
+  return __u.__v;
+}
+/* Create the vector [S T U V Z Y X W].  */
+static __inline __m128i
+_mm_setr_epi16 (short __S, short __T, short __U, short __V,
+	        short __W, short __X, short __Y, short __Z)
+{
+  union {
+    short __a[8];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __S;
+  __u.__a[1] = __T;
+  __u.__a[2] = __U;
+  __u.__a[3] = __V;
+  __u.__a[4] = __W;
+  __u.__a[5] = __X;
+  __u.__a[6] = __Y;
+  __u.__a[7] = __Z;
+
+  return __u.__v;
+}
+
+/* Create the vector [S T U V Z Y X W].  */
+static __inline __m128i
+_mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1,
+	       char __W1, char __X1, char __Y1, char __Z1,
+	       char __S, char __T, char __U, char __V,
+	       char __W, char __X, char __Y, char __Z)
+{
+  union {
+    char __a[16];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __S1;
+  __u.__a[1] = __T1;
+  __u.__a[2] = __U1;
+  __u.__a[3] = __V1;
+  __u.__a[4] = __W1;
+  __u.__a[5] = __X1;
+  __u.__a[6] = __Y1;
+  __u.__a[7] = __Z1;
+  __u.__a[8] = __S;
+  __u.__a[9] = __T;
+  __u.__a[10] = __U;
+  __u.__a[11] = __V;
+  __u.__a[12] = __W;
+  __u.__a[13] = __X;
+  __u.__a[14] = __Y;
+  __u.__a[15] = __Z;
+
+  return __u.__v;
+}
+
+static __inline __m128d
+_mm_cvtepi32_pd (__m128i __A)
+{
+  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+}
+
+static __inline __m128
+_mm_cvtepi32_ps (__m128i __A)
+{
+  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+}
+
+static __inline __m128i
+_mm_cvtpd_epi32 (__m128d __A)
+{
+  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+}
+
+static __inline __m64
+_mm_cvtpd_pi32 (__m128d __A)
+{
+  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+}
+
+static __inline __m128
+_mm_cvtpd_ps (__m128d __A)
+{
+  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+}
+
+static __inline __m128i
+_mm_cvttpd_epi32 (__m128d __A)
+{
+  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+}
+
+static __inline __m64
+_mm_cvttpd_pi32 (__m128d __A)
+{
+  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+}
+
+static __inline __m128d
+_mm_cvtpi32_pd (__m64 __A)
+{
+  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+}
+
+static __inline __m128i
+_mm_cvtps_epi32 (__m128 __A)
+{
+  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+}
+
+static __inline __m128i
+_mm_cvttps_epi32 (__m128 __A)
+{
+  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+}
+
+static __inline __m128d
+_mm_cvtps_pd (__m128 __A)
+{
+  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+}
+
+static __inline int
+_mm_cvtsd_si32 (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
+static __inline int
+_mm_cvttsd_si32 (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+static __inline long long
+_mm_cvttsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
+static __inline __m128
+_mm_cvtsd_ss (__m128 __A, __m128d __B)
+{
+  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+}
+
+static __inline __m128d
+_mm_cvtsi32_sd (__m128d __A, int __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+static __inline __m128d
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
+static __inline __m128d
+_mm_cvtss_sd (__m128d __A, __m128 __B)
+{
+  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+}
+
+#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
+
+static __inline __m128d
+_mm_unpackhi_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_unpacklo_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_loadh_pd (__m128d __A, double const *__B)
+{
+  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
+}
+
+static __inline void
+_mm_storeh_pd (double *__A, __m128d __B)
+{
+  __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
+}
+
+static __inline __m128d
+_mm_loadl_pd (__m128d __A, double const *__B)
+{
+  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
+}
+
+static __inline void
+_mm_storel_pd (double *__A, __m128d __B)
+{
+  __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
+}
+
+static __inline int
+_mm_movemask_pd (__m128d __A)
+{
+  return __builtin_ia32_movmskpd ((__v2df)__A);
+}
+
+static __inline __m128i
+_mm_packs_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_packs_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_packus_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_add_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_add_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_add_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_add_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_adds_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_adds_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_adds_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_adds_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_sub_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_sub_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_sub_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_sub_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_subs_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_subs_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_subs_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_subs_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_madd_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_mulhi_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_mullo_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m64
+_mm_mul_su32 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+}
+
+static __inline __m128i
+_mm_mul_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_sll_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_sll_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_sll_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_sra_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_sra_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_srl_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_srl_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_srl_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_slli_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+}
+
+static __inline __m128i
+_mm_slli_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+}
+
+static __inline __m128i
+_mm_slli_epi64 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+}
+
+static __inline __m128i
+_mm_srai_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+}
+
+static __inline __m128i
+_mm_srai_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+}
+
+#if 0
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, const int __B)
+{
+  return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
+}
+
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, const int __B)
+{
+  return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
+}
+#endif
+#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
+#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
+
+static __inline __m128i
+_mm_srli_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+}
+
+static __inline __m128i
+_mm_srli_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+}
+
+static __inline __m128i
+_mm_srli_epi64 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+}
+
+static __inline __m128i
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128i
+_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+}
+
+static __inline __m128i
+_mm_cmplt_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+}
+
+static __inline __m128i
+_mm_cmplt_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+}
+
+static __inline __m128i
+_mm_cmplt_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+}
+
+static __inline __m128i
+_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+}
+
+#define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B)
+
+#define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C))
+
+static __inline __m128i
+_mm_max_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_max_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_min_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_min_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline int
+_mm_movemask_epi8 (__m128i __A)
+{
+  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+}
+
+static __inline __m128i
+_mm_mulhi_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
+#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
+#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
+
+static __inline void
+_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+{
+  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+}
+
+static __inline __m128i
+_mm_avg_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline __m128i
+_mm_avg_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+static __inline __m128i
+_mm_sad_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+static __inline void
+_mm_stream_si32 (int *__A, int __B)
+{
+  __builtin_ia32_movnti (__A, __B);
+}
+
+static __inline void
+_mm_stream_si128 (__m128i *__A, __m128i __B)
+{
+  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+}
+
+static __inline void
+_mm_stream_pd (double *__A, __m128d __B)
+{
+  __builtin_ia32_movntpd (__A, (__v2df)__B);
+}
+
+static __inline __m128i
+_mm_movpi64_epi64 (__m64 __A)
+{
+  return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A);
+}
+
+static __inline void
+_mm_clflush (void const *__A)
+{
+  return __builtin_ia32_clflush (__A);
+}
+
+static __inline void
+_mm_lfence (void)
+{
+  __builtin_ia32_lfence ();
+}
+
+static __inline void
+_mm_mfence (void)
+{
+  __builtin_ia32_mfence ();
+}
+
+static __inline __m128i
+_mm_cvtsi32_si128 (int __A)
+{
+  return (__m128i) __builtin_ia32_loadd (&__A);
+}
+
+#ifdef __x86_64__
+static __inline __m128i
+_mm_cvtsi64x_si128 (long long __A)
+{
+  return (__m128i) __builtin_ia32_movq2dq (__A);
+}
+#endif
+
+static __inline int
+_mm_cvtsi128_si32 (__m128i __A)
+{
+  int __tmp;
+  __builtin_ia32_stored (&__tmp, (__v4si)__A);
+  return __tmp;
+}
+
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsi128_si64x (__m128i __A)
+{
+  return __builtin_ia32_movdq2q ((__v2di)__A);
+}
+#endif
+
+#endif /* __SSE2__  */
+
+#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c
index 8d033b9..3abae59 100644
--- a/contrib/gcc/config/i386/i386.c
+++ b/contrib/gcc/config/i386/i386.c
@@ -1257,6 +1257,14 @@ override_options ()
   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
     target_flags &= ~MASK_NO_FANCY_MATH_387;
 
+  /* Turn on SSE2 builtins for -mpni.  */
+  if (TARGET_PNI)
+    target_flags |= MASK_SSE2;
+
+  /* Turn on SSE builtins for -msse2.  */
+  if (TARGET_SSE2)
+    target_flags |= MASK_SSE;
+
   if (TARGET_64BIT)
     {
       if (TARGET_ALIGN_DOUBLE)
@@ -2463,23 +2471,55 @@ int
 ix86_return_in_memory (type)
      tree type;
 {
-  int needed_intregs, needed_sseregs;
+  int needed_intregs, needed_sseregs, size;
+  enum machine_mode mode = TYPE_MODE (type);
+
   if (TARGET_64BIT)
+    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+
+  if (mode == BLKmode)
+    return 1;
+
+  size = int_size_in_bytes (type);
+
+  if (VECTOR_MODE_P (mode) || mode == TImode)
     {
-      return !examine_argument (TYPE_MODE (type), type, 1,
-				&needed_intregs, &needed_sseregs);
-    }
-  else
-    {
-      if (TYPE_MODE (type) == BLKmode
-	  || (VECTOR_MODE_P (TYPE_MODE (type))
-	      && int_size_in_bytes (type) == 8)
-	  || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
-	      && TYPE_MODE (type) != TFmode
-	      && !VECTOR_MODE_P (TYPE_MODE (type))))
+      /* User-created vectors small enough to fit in EAX.  */
+      if (size < 8)
+	return 0;
+
+      /* MMX/3dNow values are returned on the stack, since we've
+	 got to EMMS/FEMMS before returning.  */
+      if (size == 8)
 	return 1;
-      return 0;
+
+      /* SSE values are returned in XMM0.  */
+      /* ??? Except when it doesn't exist?  We have a choice of
+	 either (1) being abi incompatible with a -march switch,
+	 or (2) generating an error here.  Given no good solution,
+	 I think the safest thing is one warning.  The user won't
+	 be able to use -Werror, but...  */
+      if (size == 16)
+	{
+	  static bool warned;
+
+	  if (TARGET_SSE)
+	    return 0;
+
+	  if (!warned)
+	    {
+	      warned = true;
+	      warning ("SSE vector return without SSE enabled changes the ABI");
+	    }
+	  return 1;
+	}
     }
+
+  if (mode == TFmode)
+    return 0;
+  if (size > 12)
+    return 1;
+  return 0;
 }
 
 /* Define how to find the value returned by a library function
@@ -2514,10 +2554,14 @@ static int
 ix86_value_regno (mode)
      enum machine_mode mode;
 {
+  /* Floating point return values in %st(0).  */
   if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
     return FIRST_FLOAT_REG;
-  if (mode == TImode || VECTOR_MODE_P (mode))
+  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
+     we prevent this case when sse is not available.  */
+  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
     return FIRST_SSE_REG;
+  /* Everything else in %eax.  */
   return 0;
 }
 
@@ -4230,7 +4274,7 @@ ix86_setup_frame_addresses ()
   cfun->machine->accesses_prev_frame = 1;
 }
 
-#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
+#if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY)
 # define USE_HIDDEN_LINKONCE 1
 #else
 # define USE_HIDDEN_LINKONCE 0
@@ -6701,8 +6745,8 @@ get_some_local_dynamic_name_1 (px, data)
    C -- print opcode suffix for set/cmov insn.
    c -- like C, but print reversed condition
    F,f -- likewise, but for floating-point.
-   O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
-        nothing
+   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+        otherwise nothing
    R -- print the prefix for register names.
    z -- print the opcode suffix for the size of the current operand.
    * -- print a star (in certain assembler syntax)
@@ -6906,7 +6950,7 @@ print_operand (file, x, code)
 	    }
 	  return;
 	case 'O':
-#ifdef CMOV_SUN_AS_SYNTAX
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    {
 	      switch (GET_MODE (x))
@@ -6926,7 +6970,7 @@ print_operand (file, x, code)
 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
 	  return;
 	case 'F':
-#ifdef CMOV_SUN_AS_SYNTAX
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    putc ('.', file);
 #endif
@@ -6945,7 +6989,7 @@ print_operand (file, x, code)
 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
 	  return;
 	case 'f':
-#ifdef CMOV_SUN_AS_SYNTAX
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    putc ('.', file);
 #endif
@@ -11147,10 +11191,15 @@ memory_address_length (addr)
   disp = parts.disp;
   len = 0;
 
+  /* Rule of thumb:
+       - esp as the base always wants an index,
+       - ebp as the base always wants a displacement.  */
+
   /* Register Indirect.  */
   if (base && !index && !disp)
     {
-      /* Special cases: ebp and esp need the two-byte modrm form.  */
+      /* esp (for its index) and ebp (for its displacement) need
+	 the two-byte modrm form.  */
       if (addr == stack_pointer_rtx
 	  || addr == arg_pointer_rtx
 	  || addr == frame_pointer_rtx
@@ -11174,9 +11223,16 @@ memory_address_length (addr)
 	  else
 	    len = 4;
 	}
+      /* ebp always wants a displacement.  */
+      else if (base == hard_frame_pointer_rtx)
+        len = 1;
 
-      /* An index requires the two-byte modrm form.  */
-      if (index)
+      /* An index requires the two-byte modrm form...  */
+      if (index
+	  /* ...like esp, which always wants an index.  */
+	  || base == stack_pointer_rtx
+	  || base == arg_pointer_rtx
+	  || base == frame_pointer_rtx)
 	len += 1;
     }
 
@@ -12066,25 +12122,20 @@ struct builtin_description
   const unsigned int flag;
 };
 
-/* Used for builtins that are enabled both by -msse and -msse2.  */
-#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
-#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
-#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
-
 static const struct builtin_description bdesc_comi[] =
 {
-  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
-  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
-  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
-  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
-  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
-  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
-  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
-  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
-  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
-  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
-  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
-  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
+  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
+  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
+  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
+  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
+  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
+  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
+  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
+  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
+  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
+  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
+  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
+  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
@@ -12102,51 +12153,51 @@ static const struct builtin_description bdesc_comi[] =
 static const struct builtin_description bdesc_2arg[] =
 {
   /* SSE */
-  { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
-
-  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
-  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
-  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
-  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
-  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
-  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
-  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
-  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
-  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
-  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
-  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
-  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
-  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
-
-  { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
-
-  { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
-
-  { MASK_SSE1, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
+
+  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
+  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
+  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
+  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
+  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
+  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
+  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
+  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
+  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
+  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
+  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
+  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
+  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
+
+  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
+
+  { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
+
+  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
 
   /* MMX */
   { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
@@ -12169,15 +12220,15 @@ static const struct builtin_description bdesc_2arg[] =
 
   { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
   { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
 
   { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
 
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
 
   { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
   { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
@@ -12186,10 +12237,10 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
   { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
 
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
 
   { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
@@ -12203,9 +12254,9 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
 
-  { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
-  { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
+  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
+  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
 
   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
@@ -12226,7 +12277,7 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
   { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
 
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
 
   /* SSE2 */
@@ -12356,26 +12407,34 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
-  { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
+  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
+  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
+
+  /* PNI MMX */
+  { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
+  { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
+  { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
+  { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
+  { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
+  { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
 };
 
 static const struct builtin_description bdesc_1arg[] =
 {
-  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
-  { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
 
-  { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
-  { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
 
-  { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
-  { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
-  { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
-  { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
-  { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
-  { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
+  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
+  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
+  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
+  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+  { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
@@ -12397,14 +12456,19 @@ static const struct builtin_description bdesc_1arg[] =
 
   { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
-  { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
-  { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
+  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
+  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
+
+  /* PNI */
+  { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
+  { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
+  { MASK_PNI, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
 };
 
 void
@@ -12495,6 +12559,13 @@ ix86_init_mmx_sse_builtins ()
     = build_function_type (void_type_node, void_list_node);
   tree void_ftype_unsigned
     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
+  tree void_ftype_unsigned_unsigned
+    = build_function_type_list (void_type_node, unsigned_type_node,
+				unsigned_type_node, NULL_TREE);
+  tree void_ftype_pcvoid_unsigned_unsigned
+    = build_function_type_list (void_type_node, const_ptr_type_node,
+				unsigned_type_node, unsigned_type_node,
+				NULL_TREE);
   tree unsigned_ftype_void
     = build_function_type (unsigned_type_node, void_list_node);
   tree di_ftype_void
@@ -12813,52 +12884,52 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
   def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
 
-  def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
-  def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
-  def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
-  def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
-  def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
-  def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
-  def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
-  def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
-  def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
-
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
-
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
-
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
-
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
-
-  def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
-  def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
-
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
-
-  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
-
-  def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
-
-  def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
+  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
+  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
+  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
+  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
+  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
+  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
+
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
+
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+
+  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
+  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
+
+  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
+
+  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
+
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
+
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+
+  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
+  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
+  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
+  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
+
+  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
 
   /* Original 3DNow!  */
   def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
@@ -12890,7 +12961,7 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
 
-  def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
+  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
 
   /* SSE2 */
   def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
@@ -12941,15 +13012,15 @@ ix86_init_mmx_sse_builtins ()
 
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
-  def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
-  def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
+  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
-  def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
+  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
 
@@ -12973,7 +13044,7 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
   def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
 
-  def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
+  def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
   def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
@@ -13000,6 +13071,26 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
+
+  /* Prescott New Instructions.  */
+  def_builtin (MASK_PNI, "__builtin_ia32_monitor",
+	       void_ftype_pcvoid_unsigned_unsigned,
+	       IX86_BUILTIN_MONITOR);
+  def_builtin (MASK_PNI, "__builtin_ia32_mwait",
+	       void_ftype_unsigned_unsigned,
+	       IX86_BUILTIN_MWAIT);
+  def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
+	       v4sf_ftype_v4sf,
+	       IX86_BUILTIN_MOVSHDUP);
+  def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
+	       v4sf_ftype_v4sf,
+	       IX86_BUILTIN_MOVSLDUP);
+  def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
+	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
+  def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
+	       v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
+  def_builtin (MASK_PNI, "__builtin_ia32_movddup",
+	       v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
 }
 
 /* Errors in the source file can cause expand_expr to return const0_rtx
@@ -13808,6 +13899,41 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
     case IX86_BUILTIN_STORED:
       return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
 
+    case IX86_BUILTIN_MONITOR:
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      if (!REG_P (op0))
+	op0 = copy_to_mode_reg (SImode, op0);
+      if (!REG_P (op1))
+	op1 = copy_to_mode_reg (SImode, op1);
+      if (!REG_P (op2))
+	op2 = copy_to_mode_reg (SImode, op2);
+      emit_insn (gen_monitor (op0, op1, op2));
+      return 0;
+
+    case IX86_BUILTIN_MWAIT:
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      if (!REG_P (op0))
+	op0 = copy_to_mode_reg (SImode, op0);
+      if (!REG_P (op1))
+	op1 = copy_to_mode_reg (SImode, op1);
+      emit_insn (gen_mwait (op0, op1));
+      return 0;
+
+    case IX86_BUILTIN_LOADDDUP:
+      return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
+
+    case IX86_BUILTIN_LDDQU:
+      return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
+				       1);
+
     default:
       break;
     }
diff --git a/contrib/gcc/config/i386/i386.h b/contrib/gcc/config/i386/i386.h
index ead133b..32bc5d0 100644
--- a/contrib/gcc/config/i386/i386.h
+++ b/contrib/gcc/config/i386/i386.h
@@ -114,10 +114,11 @@ extern int target_flags;
 #define MASK_MMX		0x00002000	/* Support MMX regs/builtins */
 #define MASK_SSE		0x00004000	/* Support SSE regs/builtins */
 #define MASK_SSE2		0x00008000	/* Support SSE2 regs/builtins */
-#define MASK_3DNOW		0x00010000	/* Support 3Dnow builtins */
-#define MASK_3DNOW_A		0x00020000	/* Support Athlon 3Dnow builtins */
-#define MASK_128BIT_LONG_DOUBLE 0x00040000	/* long double size is 128bit */
-#define MASK_64BIT		0x00080000	/* Produce 64bit code */
+#define MASK_PNI		0x00010000	/* Support PNI builtins */
+#define MASK_3DNOW		0x00020000	/* Support 3Dnow builtins */
+#define MASK_3DNOW_A		0x00040000	/* Support Athlon 3Dnow builtins */
+#define MASK_128BIT_LONG_DOUBLE 0x00080000	/* long double size is 128bit */
+#define MASK_64BIT		0x00100000	/* Produce 64bit code */
 
 /* Unused:			0x03f0000	*/
 
@@ -271,8 +272,9 @@ extern int x86_prefetch_sse;
 
 #define ASSEMBLER_DIALECT (ix86_asm_dialect)
 
-#define TARGET_SSE ((target_flags & (MASK_SSE | MASK_SSE2)) != 0)
+#define TARGET_SSE ((target_flags & MASK_SSE) != 0)
 #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
+#define TARGET_PNI ((target_flags & MASK_PNI) != 0)
 #define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
 #define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
 			     && (ix86_fpmath & FPMATH_387))
@@ -366,6 +368,10 @@ extern int x86_prefetch_sse;
     N_("Support MMX, SSE and SSE2 built-in functions and code generation") }, \
   { "no-sse2",			 -MASK_SSE2,				      \
     N_("Do not support MMX, SSE and SSE2 built-in functions and code generation") },    \
+  { "pni",			 MASK_PNI,				      \
+    N_("Support MMX, SSE, SSE2 and PNI built-in functions and code generation") }, \
+  { "no-pni",			 -MASK_PNI,				      \
+    N_("Do not support MMX, SSE, SSE2 and PNI built-in functions and code generation") }, \
   { "128bit-long-double",	 MASK_128BIT_LONG_DOUBLE,		      \
     N_("sizeof(long double) is 16") },					      \
   { "96bit-long-double",	-MASK_128BIT_LONG_DOUBLE,		      \
@@ -554,6 +560,8 @@ extern int x86_prefetch_sse;
 	builtin_define ("__SSE__");				\
       if (TARGET_SSE2)						\
 	builtin_define ("__SSE2__");				\
+      if (TARGET_PNI)						\
+	builtin_define ("__PNI__");				\
       if (TARGET_SSE_MATH && TARGET_SSE)			\
 	builtin_define ("__SSE_MATH__");			\
       if (TARGET_SSE_MATH && TARGET_SSE2)			\
@@ -2480,6 +2488,22 @@ enum ix86_builtins
   IX86_BUILTIN_MFENCE,
   IX86_BUILTIN_LFENCE,
 
+  /* Prescott New Instructions.  */
+  IX86_BUILTIN_ADDSUBPS,
+  IX86_BUILTIN_HADDPS,
+  IX86_BUILTIN_HSUBPS,
+  IX86_BUILTIN_MOVSHDUP,
+  IX86_BUILTIN_MOVSLDUP,
+  IX86_BUILTIN_ADDSUBPD,
+  IX86_BUILTIN_HADDPD,
+  IX86_BUILTIN_HSUBPD,
+  IX86_BUILTIN_LOADDDUP,
+  IX86_BUILTIN_MOVDDUP,
+  IX86_BUILTIN_LDDQU,
+
+  IX86_BUILTIN_MONITOR,
+  IX86_BUILTIN_MWAIT,
+
   IX86_BUILTIN_MAX
 };
 
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md
index 1fa2998..e4377a5 100644
--- a/contrib/gcc/config/i386/i386.md
+++ b/contrib/gcc/config/i386/i386.md
@@ -110,6 +110,13 @@
    (UNSPEC_MFENCE		59)
    (UNSPEC_LFENCE		60)
    (UNSPEC_PSADBW		61)
+   (UNSPEC_ADDSUB		71)
+   (UNSPEC_HADD			72)
+   (UNSPEC_HSUB			73)
+   (UNSPEC_MOVSHDUP		74)
+   (UNSPEC_MOVSLDUP		75)
+   (UNSPEC_LDQQU		76)
+   (UNSPEC_MOVDDUP		77)
   ])
 
 (define_constants
@@ -120,6 +127,8 @@
    (UNSPECV_STMXCSR		40)
    (UNSPECV_FEMMS		46)
    (UNSPECV_CLFLUSH		57)
+   (UNSPECV_MONITOR		69)
+   (UNSPECV_MWAIT		70)
   ])
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -22072,3 +22081,129 @@
   "lfence"
   [(set_attr "type" "sse")
    (set_attr "memory" "unknown")])
+
+;; PNI
+
+(define_insn "mwait"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")]
+		    UNSPECV_MWAIT)]
+  "TARGET_PNI"
+  "mwait\t%0, %1"
+  [(set_attr "length" "3")])
+
+(define_insn "monitor"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")
+		     (match_operand:SI 2 "register_operand" "d")]
+		    UNSPECV_MONITOR)]
+  "TARGET_PNI"
+  "monitor\t%0, %1, %2"
+  [(set_attr "length" "3")])
+
+;; PNI arithmetic
+
+(define_insn "addsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_ADDSUB))]
+  "TARGET_PNI"
+  "addsubps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "addsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_ADDSUB))]
+  "TARGET_PNI"
+  "addsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "haddv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_HADD))]
+  "TARGET_PNI"
+  "haddps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_HADD))]
+  "TARGET_PNI"
+  "haddpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "hsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_HSUB))]
+  "TARGET_PNI"
+  "hsubps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "hsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_HSUB))]
+  "TARGET_PNI"
+  "hsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "movshdup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF
+	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))]
+  "TARGET_PNI"
+  "movshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "movsldup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF
+	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))]
+  "TARGET_PNI"
+  "movsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "lddqu"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
+		       UNSPEC_LDQQU))]
+  "TARGET_PNI"
+  "lddqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "loadddup"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))]
+  "TARGET_PNI"
+  "movddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF")])
+
+(define_insn "movddup"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_duplicate:V2DF
+	 (vec_select:DF (match_operand:V2DF 1 "register_operand" "x")
+			(parallel [(const_int 0)]))))]
+  "TARGET_PNI"
+  "movddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF")])
diff --git a/contrib/gcc/config/i386/mmintrin.h b/contrib/gcc/config/i386/mmintrin.h
index 7b4aa01..00e77e4 100644
--- a/contrib/gcc/config/i386/mmintrin.h
+++ b/contrib/gcc/config/i386/mmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
 
    This file is part of GNU CC.
 
@@ -25,7 +25,7 @@
    Public License.  */
 
 /* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 5.0.  */
+   User Guide and Reference, version 8.0.  */
 
 #ifndef _MMINTRIN_H_INCLUDED
 #define _MMINTRIN_H_INCLUDED
@@ -48,6 +48,12 @@ _mm_empty (void)
   __builtin_ia32_emms ();
 }
 
+static __inline void
+_m_empty (void)
+{
+  _mm_empty ();
+}
+
 /* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
 static __inline __m64 
 _mm_cvtsi32_si64 (int __i)
@@ -56,6 +62,12 @@ _mm_cvtsi32_si64 (int __i)
   return (__m64) __tmp;
 }
 
+static __inline __m64 
+_m_from_int (int __i)
+{
+  return _mm_cvtsi32_si64 (__i);
+}
+
 #ifdef __x86_64__
 /* Convert I to a __m64 object.  */
 static __inline __m64 
@@ -80,6 +92,12 @@ _mm_cvtsi64_si32 (__m64 __i)
   return __tmp;
 }
 
+static __inline int
+_m_to_int (__m64 __i)
+{
+  return _mm_cvtsi64_si32 (__i);
+}
+
 #ifdef __x86_64__
 /* Convert the lower 32 bits of the __m64 object into an integer.  */
 static __inline long long
@@ -98,6 +116,12 @@ _mm_packs_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_packsswb (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pi16 (__m1, __m2);
+}
+
 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
    the result, and the two 32-bit values from M2 into the upper two 16-bit
    values of the result, all with signed saturation.  */
@@ -107,6 +131,12 @@ _mm_packs_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
 }
 
+static __inline __m64
+_m_packssdw (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pi32 (__m1, __m2);
+}
+
 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
    the result, and the four 16-bit values from M2 into the upper four 8-bit
    values of the result, all with unsigned saturation.  */
@@ -116,6 +146,12 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_packuswb (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pu16 (__m1, __m2);
+}
+
 /* Interleave the four 8-bit values from the high half of M1 with the four
    8-bit values from the high half of M2.  */
 static __inline __m64
@@ -124,6 +160,12 @@ _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_punpckhbw (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi8 (__m1, __m2);
+}
+
 /* Interleave the two 16-bit values from the high half of M1 with the two
    16-bit values from the high half of M2.  */
 static __inline __m64
@@ -132,6 +174,12 @@ _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_punpckhwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi16 (__m1, __m2);
+}
+
 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
    value from the high half of M2.  */
 static __inline __m64
@@ -140,6 +188,12 @@ _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
 }
 
+static __inline __m64
+_m_punpckhdq (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi32 (__m1, __m2);
+}
+
 /* Interleave the four 8-bit values from the low half of M1 with the four
    8-bit values from the low half of M2.  */
 static __inline __m64
@@ -148,6 +202,12 @@ _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_punpcklbw (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi8 (__m1, __m2);
+}
+
 /* Interleave the two 16-bit values from the low half of M1 with the two
    16-bit values from the low half of M2.  */
 static __inline __m64
@@ -156,6 +216,12 @@ _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_punpcklwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi16 (__m1, __m2);
+}
+
 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
    value from the low half of M2.  */
 static __inline __m64
@@ -164,6 +230,12 @@ _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
 }
 
+static __inline __m64
+_m_punpckldq (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi32 (__m1, __m2);
+}
+
 /* Add the 8-bit values in M1 to the 8-bit values in M2.  */
 static __inline __m64
 _mm_add_pi8 (__m64 __m1, __m64 __m2)
@@ -171,6 +243,12 @@ _mm_add_pi8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_paddb (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi8 (__m1, __m2);
+}
+
 /* Add the 16-bit values in M1 to the 16-bit values in M2.  */
 static __inline __m64
 _mm_add_pi16 (__m64 __m1, __m64 __m2)
@@ -178,6 +256,12 @@ _mm_add_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_paddw (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi16 (__m1, __m2);
+}
+
 /* Add the 32-bit values in M1 to the 32-bit values in M2.  */
 static __inline __m64
 _mm_add_pi32 (__m64 __m1, __m64 __m2)
@@ -185,6 +269,12 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
 }
 
+static __inline __m64
+_m_paddd (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi32 (__m1, __m2);
+}
+
 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
 static __inline __m64
 _mm_add_si64 (__m64 __m1, __m64 __m2)
@@ -200,6 +290,12 @@ _mm_adds_pi8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_paddsb (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pi8 (__m1, __m2);
+}
+
 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
    saturated arithmetic.  */
 static __inline __m64
@@ -208,6 +304,12 @@ _mm_adds_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_paddsw (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pi16 (__m1, __m2);
+}
+
 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
    saturated arithmetic.  */
 static __inline __m64
@@ -216,6 +318,12 @@ _mm_adds_pu8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_paddusb (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pu8 (__m1, __m2);
+}
+
 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
    saturated arithmetic.  */
 static __inline __m64
@@ -224,6 +332,12 @@ _mm_adds_pu16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_paddusw (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pu16 (__m1, __m2);
+}
+
 /* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
 static __inline __m64
 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
@@ -231,6 +345,12 @@ _mm_sub_pi8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_psubb (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi8 (__m1, __m2);
+}
+
 /* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
 static __inline __m64
 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
@@ -238,6 +358,12 @@ _mm_sub_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_psubw (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi16 (__m1, __m2);
+}
+
 /* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
 static __inline __m64
 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
@@ -245,6 +371,12 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
 }
 
+static __inline __m64
+_m_psubd (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi32 (__m1, __m2);
+}
+
 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
 static __inline __m64
 _mm_sub_si64 (__m64 __m1, __m64 __m2)
@@ -260,6 +392,12 @@ _mm_subs_pi8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_psubsb (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pi8 (__m1, __m2);
+}
+
 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
    signed saturating arithmetic.  */
 static __inline __m64
@@ -268,6 +406,12 @@ _mm_subs_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_psubsw (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pi16 (__m1, __m2);
+}
+
 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
    unsigned saturating arithmetic.  */
 static __inline __m64
@@ -276,6 +420,12 @@ _mm_subs_pu8 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_psubusb (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pu8 (__m1, __m2);
+}
+
 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
    unsigned saturating arithmetic.  */
 static __inline __m64
@@ -284,6 +434,12 @@ _mm_subs_pu16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_psubusw (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pu16 (__m1, __m2);
+}
+
 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
    four 32-bit intermediate results, which are then summed by pairs to
    produce two 32-bit results.  */
@@ -293,6 +449,12 @@ _mm_madd_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_pmaddwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_madd_pi16 (__m1, __m2);
+}
+
 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
    M2 and produce the high 16 bits of the 32-bit results.  */
 static __inline __m64
@@ -301,6 +463,12 @@ _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_pmulhw (__m64 __m1, __m64 __m2)
+{
+  return _mm_mulhi_pi16 (__m1, __m2);
+}
+
 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
    the low 16 bits of the results.  */
 static __inline __m64
@@ -309,6 +477,12 @@ _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_pmullw (__m64 __m1, __m64 __m2)
+{
+  return _mm_mullo_pi16 (__m1, __m2);
+}
+
 /* Shift four 16-bit values in M left by COUNT.  */
 static __inline __m64
 _mm_sll_pi16 (__m64 __m, __m64 __count)
@@ -317,11 +491,23 @@ _mm_sll_pi16 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psllw (__m64 __m, __m64 __count)
+{
+  return _mm_sll_pi16 (__m, __count);
+}
+
+static __inline __m64
 _mm_slli_pi16 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
 }
 
+static __inline __m64
+_m_psllwi (__m64 __m, int __count)
+{
+  return _mm_slli_pi16 (__m, __count);
+}
+
 /* Shift two 32-bit values in M left by COUNT.  */
 static __inline __m64
 _mm_sll_pi32 (__m64 __m, __m64 __count)
@@ -330,11 +516,23 @@ _mm_sll_pi32 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_pslld (__m64 __m, __m64 __count)
+{
+  return _mm_sll_pi32 (__m, __count);
+}
+
+static __inline __m64
 _mm_slli_pi32 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
 }
 
+static __inline __m64
+_m_pslldi (__m64 __m, int __count)
+{
+  return _mm_slli_pi32 (__m, __count);
+}
+
 /* Shift the 64-bit value in M left by COUNT.  */
 static __inline __m64
 _mm_sll_si64 (__m64 __m, __m64 __count)
@@ -343,11 +541,23 @@ _mm_sll_si64 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psllq (__m64 __m, __m64 __count)
+{
+  return _mm_sll_si64 (__m, __count);
+}
+
+static __inline __m64
 _mm_slli_si64 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
 }
 
+static __inline __m64
+_m_psllqi (__m64 __m, int __count)
+{
+  return _mm_slli_si64 (__m, __count);
+}
+
 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
 static __inline __m64
 _mm_sra_pi16 (__m64 __m, __m64 __count)
@@ -356,11 +566,23 @@ _mm_sra_pi16 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psraw (__m64 __m, __m64 __count)
+{
+  return _mm_sra_pi16 (__m, __count);
+}
+
+static __inline __m64
 _mm_srai_pi16 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
 }
 
+static __inline __m64
+_m_psrawi (__m64 __m, int __count)
+{
+  return _mm_srai_pi16 (__m, __count);
+}
+
 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
 static __inline __m64
 _mm_sra_pi32 (__m64 __m, __m64 __count)
@@ -369,11 +591,23 @@ _mm_sra_pi32 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psrad (__m64 __m, __m64 __count)
+{
+  return _mm_sra_pi32 (__m, __count);
+}
+
+static __inline __m64
 _mm_srai_pi32 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
 }
 
+static __inline __m64
+_m_psradi (__m64 __m, int __count)
+{
+  return _mm_srai_pi32 (__m, __count);
+}
+
 /* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
 static __inline __m64
 _mm_srl_pi16 (__m64 __m, __m64 __count)
@@ -382,11 +616,23 @@ _mm_srl_pi16 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psrlw (__m64 __m, __m64 __count)
+{
+  return _mm_srl_pi16 (__m, __count);
+}
+
+static __inline __m64
 _mm_srli_pi16 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
 }
 
+static __inline __m64
+_m_psrlwi (__m64 __m, int __count)
+{
+  return _mm_srli_pi16 (__m, __count);
+}
+
 /* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
 static __inline __m64
 _mm_srl_pi32 (__m64 __m, __m64 __count)
@@ -395,11 +641,23 @@ _mm_srl_pi32 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psrld (__m64 __m, __m64 __count)
+{
+  return _mm_srl_pi32 (__m, __count);
+}
+
+static __inline __m64
 _mm_srli_pi32 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
 }
 
+static __inline __m64
+_m_psrldi (__m64 __m, int __count)
+{
+  return _mm_srli_pi32 (__m, __count);
+}
+
 /* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
 static __inline __m64
 _mm_srl_si64 (__m64 __m, __m64 __count)
@@ -408,11 +666,23 @@ _mm_srl_si64 (__m64 __m, __m64 __count)
 }
 
 static __inline __m64
+_m_psrlq (__m64 __m, __m64 __count)
+{
+  return _mm_srl_si64 (__m, __count);
+}
+
+static __inline __m64
 _mm_srli_si64 (__m64 __m, int __count)
 {
   return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
 }
 
+static __inline __m64
+_m_psrlqi (__m64 __m, int __count)
+{
+  return _mm_srli_si64 (__m, __count);
+}
+
 /* Bit-wise AND the 64-bit values in M1 and M2.  */
 static __inline __m64
 _mm_and_si64 (__m64 __m1, __m64 __m2)
@@ -420,6 +690,12 @@ _mm_and_si64 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2);
 }
 
+static __inline __m64
+_m_pand (__m64 __m1, __m64 __m2)
+{
+  return _mm_and_si64 (__m1, __m2);
+}
+
 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
    64-bit value in M2.  */
 static __inline __m64
@@ -428,6 +704,12 @@ _mm_andnot_si64 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2);
 }
 
+static __inline __m64
+_m_pandn (__m64 __m1, __m64 __m2)
+{
+  return _mm_andnot_si64 (__m1, __m2);
+}
+
 /* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
 static __inline __m64
 _mm_or_si64 (__m64 __m1, __m64 __m2)
@@ -435,6 +717,12 @@ _mm_or_si64 (__m64 __m1, __m64 __m2)
   return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2);
 }
 
+static __inline __m64
+_m_por (__m64 __m1, __m64 __m2)
+{
+  return _mm_or_si64 (__m1, __m2);
+}
+
 /* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
 static __inline __m64
 _mm_xor_si64 (__m64 __m1, __m64 __m2)
@@ -442,6 +730,12 @@ _mm_xor_si64 (__m64 __m1, __m64 __m2)
   return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2);
 }
 
+static __inline __m64
+_m_pxor (__m64 __m1, __m64 __m2)
+{
+  return _mm_xor_si64 (__m1, __m2);
+}
+
 /* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
    test is true and zero if false.  */
 static __inline __m64
@@ -451,11 +745,23 @@ _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
 }
 
 static __inline __m64
+_m_pcmpeqb (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi8 (__m1, __m2);
+}
+
+static __inline __m64
 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
 {
   return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
 }
 
+static __inline __m64
+_m_pcmpgtb (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi8 (__m1, __m2);
+}
+
 /* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
    the test is true and zero if false.  */
 static __inline __m64
@@ -465,11 +771,23 @@ _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
 }
 
 static __inline __m64
+_m_pcmpeqw (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi16 (__m1, __m2);
+}
+
+static __inline __m64
 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
 {
   return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
 }
 
+static __inline __m64
+_m_pcmpgtw (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi16 (__m1, __m2);
+}
+
 /* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
    the test is true and zero if false.  */
 static __inline __m64
@@ -479,11 +797,23 @@ _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
 }
 
 static __inline __m64
+_m_pcmpeqd (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi32 (__m1, __m2);
+}
+
+static __inline __m64
 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
 {
   return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
 }
 
+static __inline __m64
+_m_pcmpgtd (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi32 (__m1, __m2);
+}
+
 /* Creates a 64-bit zero.  */
 static __inline __m64
 _mm_setzero_si64 (void)
@@ -574,7 +904,7 @@ _mm_set1_pi16 (short __w)
   return _mm_set1_pi32 (__i);
 }
 
-/* Creates a vector of four 16-bit values, all elements containing B.  */
+/* Creates a vector of eight 8-bit values, all elements containing B.  */
 static __inline __m64
 _mm_set1_pi8 (char __b)
 {
diff --git a/contrib/gcc/config/i386/pmmintrin.h b/contrib/gcc/config/i386/pmmintrin.h
new file mode 100644
index 0000000..5649c00
--- /dev/null
+++ b/contrib/gcc/config/i386/pmmintrin.h
@@ -0,0 +1,132 @@
+/* Copyright (C) 2003 Free Software Foundation, Inc.
+
+   This file is part of GNU CC.
+
+   GNU CC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GNU CC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GNU CC; see the file COPYING.  If not, write to
+   the Free Software Foundation, 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 8.0.  */
+
+#ifndef _PMMINTRIN_H_INCLUDED
+#define _PMMINTRIN_H_INCLUDED
+
+#ifdef __PNI__
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+/* Additional bits in the MXCSR.  */
+#define _MM_DENORMALS_ZERO_MASK		0x0040
+#define _MM_DENORMALS_ZERO_ON		0x0040
+#define _MM_DENORMALS_ZERO_OFF		0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
+  _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE() \
+  (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+
+static __inline __m128
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+static __inline __m128
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+static __inline __m128
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+static __inline __m128
+_mm_movehdup_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
+}
+
+static __inline __m128
+_mm_moveldup_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
+}
+
+static __inline __m128d
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+static __inline __m128d
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
+}
+
+static __inline __m128d
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+static __inline __m128d
+_mm_loaddup_pd (double const *__P)
+{
+  return (__m128d) __builtin_ia32_loadddup (__P);
+}
+
+static __inline __m128d
+_mm_movedup_pd (__m128d __X)
+{
+  return (__m128d) __builtin_ia32_movddup ((__v2df)__X);
+}
+
+static __inline __m128i
+_mm_lddqu_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
+}
+
+#if 0
+static __inline void
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_monitor (__P, __E, __H);
+}
+
+static __inline void
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_mwait (__E, __H);
+}
+#else
+#define _mm_monitor(P, E, H)	__builtin_ia32_monitor ((P), (E), (H))
+#define _mm_mwait(E, H)		__builtin_ia32_mwait ((E), (H))
+#endif
+
+#endif /* __PNI__ */
+
+#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/contrib/gcc/config/i386/sco5.h b/contrib/gcc/config/i386/sco5.h
index 815e457..2a28e54 100644
--- a/contrib/gcc/config/i386/sco5.h
+++ b/contrib/gcc/config/i386/sco5.h
@@ -1,7 +1,7 @@
 /* Definitions for Intel 386 running SCO Unix System V 3.2 Version 5.
-   Copyright (C) 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2002
+   Copyright (C) 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003
    Free Software Foundation, Inc.
-   Contributed by Kean Johnston (hug@netcom.com)
+   Contributed by Kean Johnston (jkj@sco.com)
 
 This file is part of GNU CC.
 
@@ -20,230 +20,36 @@ along with GNU CC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
+#undef TARGET_VERSION
 #define TARGET_VERSION fprintf (stderr, " (i386, SCO OpenServer 5 Syntax)");
 
-#undef LPREFIX
-#define LPREFIX				".L"
-
-#undef ALIGN_ASM_OP
-#define ALIGN_ASM_OP			"\t.align\t"
-
-#undef ASCII_DATA_ASM_OP
-#define ASCII_DATA_ASM_OP		"\t.ascii\t"
-
-#undef IDENT_ASM_OP
-#define IDENT_ASM_OP			"\t.ident\t"
-
-#undef COMMON_ASM_OP
-#define COMMON_ASM_OP			"\t.comm\t"
-
-#undef SET_ASM_OP
-#define SET_ASM_OP			"\t.set\t"
-
-#undef LOCAL_ASM_OP
-#define LOCAL_ASM_OP			"\t.local\t"
-
-#undef ASM_SHORT
-#define ASM_SHORT			"\t.value\t"
-
-#undef ASM_LONG
-#define ASM_LONG			"\t.long\t"
+/* The native link editor does not support linkonce stuff */
+#define SUPPORTS_ONE_ONLY		0
 
 #undef ASM_QUAD
 
-#undef TYPE_ASM_OP
-#define TYPE_ASM_OP			"\t.type\t"
-
-#undef SIZE_ASM_OP
-#define SIZE_ASM_OP			"\t.size\t"
-
-#undef STRING_ASM_OP
-#define STRING_ASM_OP			"\t.string\t"
-
-#undef SKIP_ASM_OP
-#define SKIP_ASM_OP			"\t.zero\t"
-
 #undef GLOBAL_ASM_OP
 #define GLOBAL_ASM_OP			"\t.globl\t"
 
-#undef EH_FRAME_SECTION_ASM_OP
-#define EH_FRAME_SECTION_NAME_COFF	".ehfram"
-#define EH_FRAME_SECTION_NAME_ELF	".eh_frame"
-#define EH_FRAME_SECTION_NAME	\
-  ((TARGET_ELF) ? EH_FRAME_SECTION_NAME_ELF : EH_FRAME_SECTION_NAME_COFF)
-
-/* Avoid problems (long sectino names, forward assembler refs) with DWARF
-   exception unwinding when we're generating COFF */
-#define DWARF2_UNWIND_INFO	\
-  ((TARGET_ELF) ? 1 : 0 )  
-
-#undef READONLY_DATA_SECTION_ASM_OP
-#define READONLY_DATA_SECTION_ASM_OP_COFF	"\t.section\t.rodata, \"x\""
-#define READONLY_DATA_SECTION_ASM_OP_ELF	"\t.section\t.rodata"
-#define READONLY_DATA_SECTION_ASM_OP		\
-  ((TARGET_ELF)					\
-   ? READONLY_DATA_SECTION_ASM_OP_ELF		\
-   : READONLY_DATA_SECTION_ASM_OP_COFF)
-
-#undef INIT_SECTION_ASM_OP
-#define INIT_SECTION_ASM_OP_ELF		"\t.section\t.init"
-/* Rename these for COFF because crt1.o will try to run them.  */
-#define INIT_SECTION_ASM_OP_COFF	"\t.section\t.ctor ,\"x\""
-#define INIT_SECTION_ASM_OP	\
-  ((TARGET_ELF) ? INIT_SECTION_ASM_OP_ELF : INIT_SECTION_ASM_OP_COFF)
-
-#undef CTORS_SECTION_ASM_OP
-#define CTORS_SECTION_ASM_OP_ELF	"\t.section\t.ctors,\"aw\""
-#define CTORS_SECTION_ASM_OP_COFF	INIT_SECTION_ASM_OP_COFF
-#define CTORS_SECTION_ASM_OP	\
- ((TARGET_ELF) ? CTORS_SECTION_ASM_OP_ELF : CTORS_SECTION_ASM_OP_COFF)
-
-#undef DTORS_SECTION_ASM_OP
-#define DTORS_SECTION_ASM_OP_ELF	"\t.section\t.dtors, \"aw\""
-#define DTORS_SECTION_ASM_OP_COFF	FINI_SECTION_ASM_OP_COFF
-#define DTORS_SECTION_ASM_OP	\
- ((TARGET_ELF) ? DTORS_SECTION_ASM_OP_ELF : DTORS_SECTION_ASM_OP_COFF)
-
-#undef FINI_SECTION_ASM_OP
-#define FINI_SECTION_ASM_OP_ELF		"\t.section\t.fini"
-#define FINI_SECTION_ASM_OP_COFF	"\t.section\t.dtor, \"x\""
-#define FINI_SECTION_ASM_OP	\
- ((TARGET_ELF) ? FINI_SECTION_ASM_OP_ELF : FINI_SECTION_ASM_OP_COFF)
-
 #undef BSS_SECTION_ASM_OP
-#define BSS_SECTION_ASM_OP		"\t.data"
-
-#undef TEXT_SECTION_ASM_OP
-#define TEXT_SECTION_ASM_OP		"\t.text"
-
-#undef DATA_SECTION_ASM_OP
-#define DATA_SECTION_ASM_OP		"\t.data"
-
-#undef TYPE_OPERAND_FMT
-#define TYPE_OPERAND_FMT		"@%s"
-
-#undef APPLY_RESULT_SIZE
-#define APPLY_RESULT_SIZE						\
-(TARGET_ELF) ? size : 116
-
-#ifndef ASM_DECLARE_RESULT
-#define ASM_DECLARE_RESULT(FILE, RESULT)
-#endif
-
-#define SCO_DEFAULT_ASM_COFF(FILE,NAME)					\
-do {									\
-      ASM_OUTPUT_LABEL (FILE, NAME);					\
-  } while (0)
-
-#undef ASM_DECLARE_FUNCTION_NAME
-#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
-  do {									\
-    if (TARGET_ELF) {							\
-      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");		\
-      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));			\
-      ASM_OUTPUT_LABEL (FILE, NAME);					\
-    } else								\
-      SCO_DEFAULT_ASM_COFF(FILE, NAME);					\
-} while (0)
-
-#undef ASM_DECLARE_FUNCTION_SIZE
-#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
-  do {									\
-    if (TARGET_ELF && !flag_inhibit_size_directive)			\
-      ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);				\
-  } while (0)
-
-#undef ASM_DECLARE_OBJECT_NAME
-#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
-  do {									\
-    if (TARGET_ELF) {							\
-      HOST_WIDE_INT size;						\
-									\
-      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
-									\
-      size_directive_output = 0;					\
-      if (!flag_inhibit_size_directive					\
-	  && (DECL) && DECL_SIZE (DECL))				\
-	{								\
-	  size_directive_output = 1;					\
-	  size = int_size_in_bytes (TREE_TYPE (DECL));			\
-	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);			\
-	}								\
-									\
-      ASM_OUTPUT_LABEL (FILE, NAME);					\
-    } else								\
-      SCO_DEFAULT_ASM_COFF(FILE, NAME);					\
-  } while (0)
+#define BSS_SECTION_ASM_OP		"\t.section\t.bss, \"aw\", @nobits"
+  
+/*
+ * NOTE: We really do want CTORS_SECTION_ASM_OP and DTORS_SECTION_ASM_OP.
+ * Here's the reason why. If we dont define them, and we dont define them
+ * to always emit to the same section, the default is to emit to "named"
+ * ctors and dtors sections. This would be great if we could use GNU ld,
+ * but we can't. The native linker could possibly be trained to coalesce
+ * named ctors sections, but that hasn't been done either. So if we don't
+ * define these, many C++ ctors and dtors dont get run, because they never
+ * wind up in the ctors/dtors arrays.
+ */
+#define CTORS_SECTION_ASM_OP		"\t.section\t.ctors, \"aw\""
+#define DTORS_SECTION_ASM_OP		"\t.section\t.dtors, \"aw\""
 
-#undef ASM_FILE_START_1
-#define ASM_FILE_START_1(FILE)
-
-#undef ASM_FILE_START
-#define ASM_FILE_START(FILE)						\
-do {									\
-  output_file_directive((FILE),main_input_filename);			\
-  fprintf ((FILE), "\t.version\t\"01.01\"\n");				\
-} while (0)
-
-#undef ASM_FINISH_DECLARE_OBJECT
-#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
-do {									 \
-  if (TARGET_ELF) {							 \
-     const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
-     HOST_WIDE_INT size;						 \
-     if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		 \
-         && ! AT_END && TOP_LEVEL					 \
-	 && DECL_INITIAL (DECL) == error_mark_node			 \
-	 && !size_directive_output)					 \
-       {								 \
-	 size_directive_output = 1;					 \
-	 size = int_size_in_bytes (TREE_TYPE (DECL));			 \
-	 ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			 \
-       }								 \
-    }									 \
-} while (0)
-
-#undef ASM_GENERATE_INTERNAL_LABEL
-#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)			\
-do {									\
-  if (TARGET_ELF)							\
-    sprintf (LABEL, "*.%s%ld", (PREFIX), (long)(NUM));			\
-  else									\
-    sprintf (LABEL, ".%s%ld", (PREFIX), (long)(NUM));			\
-} while (0)
-
-#undef ASM_OUTPUT_ALIGNED_COMMON
-#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
-do {									\
-  fprintf ((FILE), "%s", COMMON_ASM_OP);				\
-  assemble_name ((FILE), (NAME));					\
-  if (TARGET_ELF)							\
-    fprintf ((FILE), ",%u,%u\n", (SIZE), (ALIGN) / BITS_PER_UNIT);	\
-  else									\
-    fprintf ((FILE), ",%u\n", (SIZE));					\
-} while (0)
-
-#undef ASM_OUTPUT_ALIGNED_LOCAL
-#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
-do {									\
-  if (TARGET_ELF) {							\
-    fprintf ((FILE), "%s", LOCAL_ASM_OP);				\
-    assemble_name ((FILE), (NAME));					\
-    fprintf ((FILE), "\n");						\
-    ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);		\
-  } else {								\
-    int align = exact_log2 (ALIGN);					\
-    if (align > 2) align = 2;						\
-    if (TARGET_SVR3_SHLIB)						\
-      data_section ();							\
-    else								\
-      bss_section ();							\
-    ASM_OUTPUT_ALIGN ((FILE), align == -1 ? 2 : align);			\
-    fprintf ((FILE), "%s\t", "\t.lcomm");				\
-    assemble_name ((FILE), (NAME));					\
-    fprintf ((FILE), ",%u\n", (SIZE));					\
-   }									\
-} while (0)
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE true
 
 /* A C statement (sans semicolon) to output to the stdio stream
    FILE the assembler definition of uninitialized global DECL named
@@ -251,248 +57,23 @@ do {									\
    Try to use asm_output_aligned_bss to implement this macro.  */
 
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
-asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
-
-#undef ESCAPES
-#define ESCAPES \
-"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
-\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
-\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
-\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
-\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
-\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
-
-#undef STRING_LIMIT
-#define STRING_LIMIT	((unsigned) 256)
-
-#undef ASM_OUTPUT_LIMITED_STRING
-#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)				\
-  do									\
-    {									\
-      register const unsigned char *_limited_str =			\
-        (const unsigned char *) (STR);					\
-      register unsigned ch;						\
-      fprintf ((FILE), "%s\"", STRING_ASM_OP);				\
-      for (; (ch = *_limited_str); _limited_str++)			\
-        {								\
-	  register int escape;						\
-	  switch (escape = ESCAPES[ch])					\
-	    {								\
-	    case 0:							\
-	      putc (ch, (FILE));					\
-	      break;							\
-	    case 1:							\
-	      fprintf ((FILE), "\\%03o", ch);				\
-	      break;							\
-	    default:							\
-	      putc ('\\', (FILE));					\
-	      putc (escape, (FILE));					\
-	      break;							\
-	    }								\
-        }								\
-      fprintf ((FILE), "\"\n");						\
-    }									\
-  while (0)
-
-
-#undef ASM_OUTPUT_ASCII
-#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)				\
-do {									\
-      register const unsigned char *_ascii_bytes =			\
-        (const unsigned char *) (STR);					\
-      register const unsigned char *limit = _ascii_bytes + (LENGTH);	\
-      register unsigned bytes_in_chunk = 0;				\
-      for (; _ascii_bytes < limit; _ascii_bytes++)			\
-        {								\
-	  register unsigned const char *p;				\
-	  if (bytes_in_chunk >= 64)					\
-	    {								\
-	      fputc ('\n', (FILE));					\
-	      bytes_in_chunk = 0;					\
-	    }								\
-	  for (p = _ascii_bytes; p < limit && *p != '\0'; p++)		\
-	    continue;							\
-	  if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT)	\
-	    {								\
-	      if (bytes_in_chunk > 0)					\
-		{							\
-		  fputc ('\n', (FILE));					\
-		  bytes_in_chunk = 0;					\
-		}							\
-	      ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes);		\
-	      _ascii_bytes = p;						\
-	    }								\
-	  else								\
-	    {								\
-	      if (bytes_in_chunk == 0)					\
-		fputs ("\t.byte\t", (FILE));				\
-	      else							\
-		fputc (',', (FILE));					\
-	      fprintf ((FILE), "0x%02x", *_ascii_bytes);		\
-	      bytes_in_chunk += 5;					\
-	    }								\
-	}								\
-      if (bytes_in_chunk > 0)						\
-        fprintf ((FILE), "\n");						\
-} while (0) 
-
-#undef ASM_OUTPUT_CASE_LABEL
-#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,JUMPTABLE)		\
-do {									\
-  if (TARGET_ELF)							\
-    ASM_OUTPUT_ALIGN ((FILE), 2);					\
-  ASM_OUTPUT_INTERNAL_LABEL((FILE),(PREFIX),(NUM));			\
-} while (0)
-
-#undef ASM_OUTPUT_IDENT
-#define ASM_OUTPUT_IDENT(FILE, NAME) \
-  fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME);
-
-#undef ASM_OUTPUT_EXTERNAL_LIBCALL
-#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)				\
-  if (TARGET_ELF) (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0))
-
-#undef ASM_OUTPUT_INTERNAL_LABEL
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM)			\
-  fprintf (FILE, ".%s%d:\n", PREFIX, NUM)
-
-/* The prefix to add to user-visible assembler symbols.  */
-
-#undef USER_LABEL_PREFIX
-#define USER_LABEL_PREFIX ""
-
-/* 
- * We rename 'gcc_except_table' to the shorter name in preparation
- * for the day when we're ready to do DWARF2 eh unwinding under COFF.
- */
-/* #define EXCEPTION_SECTION()		named_section (NULL, ".gccexc", 1) */
-
-/* Switch into a generic section.  */
-#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section 
-
-#undef ASM_OUTPUT_SKIP
-#define ASM_OUTPUT_SKIP(FILE,SIZE) \
-do {									\
-  if (TARGET_ELF)							\
-    fprintf (FILE, "%s%u\n", SKIP_ASM_OP, (SIZE));			\
-  else									\
-    fprintf ((FILE), "%s.,.+%u\n", SET_ASM_OP, (SIZE));		\
-} while (0)
-
-
-#undef CTOR_LIST_BEGIN
-#define CTOR_LIST_BEGIN							\
-do {									\
-  asm (CTORS_SECTION_ASM_OP);						\
-  if (TARGET_ELF)							\
-    STATIC func_ptr __CTOR_LIST__[1] = { (func_ptr) (-1) };		\
-  else									\
-    asm ("pushl $0");							\
-} while (0)
-
-#undef CTOR_LIST_END
-#define CTOR_LIST_END							\
-do {									\
-  if (TARGET_ELF) {							\
-    asm (CTORS_SECTION_ASM_OP);						\
-    STATIC func_ptr __CTOR_LIST__[1] = { (func_ptr) (0) };		\
-  } else {								\
-    CTOR_LIST_BEGIN;							\
-  }									\
-} while (0)
-
-#undef DBX_BLOCKS_FUNCTION_RELATIVE
-#define DBX_BLOCKS_FUNCTION_RELATIVE 1
-
-#undef DBX_FUNCTION_FIRST
-#define DBX_FUNCTION_FIRST 1
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
 
 #undef DBX_REGISTER_NUMBER
-#define DBX_REGISTER_NUMBER(n) \
-  ((TARGET_ELF) ? svr4_dbx_register_map[n] : dbx_register_map[n])
+#define DBX_REGISTER_NUMBER(n)	svr4_dbx_register_map[n]
 
-#define DWARF2_DEBUGGING_INFO 1
-#define DWARF_DEBUGGING_INFO 1
-#define SDB_DEBUGGING_INFO 1
-#define DBX_DEBUGGING_INFO 1
+#define DWARF2_DEBUGGING_INFO		1
+#define DWARF_DEBUGGING_INFO		1
+#define DBX_DEBUGGING_INFO		1
 
 #undef PREFERRED_DEBUGGING_TYPE
-#define PREFERRED_DEBUGGING_TYPE					\
-  ((TARGET_ELF) ? DWARF2_DEBUG: SDB_DEBUG)
-
-#undef EXTRA_SECTIONS
-#define EXTRA_SECTIONS in_init, in_fini
-
-#undef EXTRA_SECTION_FUNCTIONS
-#define EXTRA_SECTION_FUNCTIONS						\
-  INIT_SECTION_FUNCTION							\
-  FINI_SECTION_FUNCTION
-
-#undef FINI_SECTION_FUNCTION
-#define FINI_SECTION_FUNCTION						\
-void									\
-fini_section ()								\
-{									\
-  if ((!TARGET_ELF) && in_section != in_fini)				\
-    {									\
-      fprintf (asm_out_file, "%s\n", FINI_SECTION_ASM_OP);		\
-      in_section = in_fini;						\
-    }									\
-}
-
-#undef INIT_SECTION_FUNCTION
-#define INIT_SECTION_FUNCTION						\
-void									\
-init_section ()								\
-{									\
-  if ((!TARGET_ELF) && in_section != in_init)				\
-    {									\
-      fprintf (asm_out_file, "%s\n", INIT_SECTION_ASM_OP);		\
-      in_section = in_init;						\
-    }									\
-}
-
-#undef SUBTARGET_FRAME_POINTER_REQUIRED
-#define SUBTARGET_FRAME_POINTER_REQUIRED				\
-  ((TARGET_ELF) ? 0 : 							\
-   (current_function_calls_setjmp || current_function_calls_longjmp))
-
-#undef LOCAL_LABEL_PREFIX
-#define LOCAL_LABEL_PREFIX						\
- ((TARGET_ELF) ? "" : ".")
-
-#undef MD_EXEC_PREFIX
-#undef MD_STARTFILE_PREFIX
-#define MD_EXEC_PREFIX "/usr/ccs/bin/"
-#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
-
-#undef NON_SAVING_SETJMP
-#define NON_SAVING_SETJMP						\
-  ((TARGET_ELF) ? 0 : 							\
-   (current_function_calls_setjmp && current_function_calls_longjmp))
+#define PREFERRED_DEBUGGING_TYPE	DWARF2_DEBUG
+
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO		1
 
 #undef NO_IMPLICIT_EXTERN_C
-#define NO_IMPLICIT_EXTERN_C 1
-
-/* JKJ FIXME - examine the ramifications of RETURN_IN_MEMORY and
-   RETURN_POPS_ARGS */
-
-#undef RETURN_POPS_ARGS
-#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) 				\
- ((TARGET_ELF) ?							\
-  (ix86_return_pops_args (FUNDECL, FUNTYPE, SIZE)) : 			\
-  (((FUNDECL) && (TREE_CODE (FUNDECL) == IDENTIFIER_NODE)) ? 0		\
-   : (TARGET_RTD							\
-      && (TYPE_ARG_TYPES (FUNTYPE) == 0					\
-	  || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE)))		\
-	      == void_type_node))) ? (SIZE)				\
-   : 0))
-
-/* ??? Ignore coff.  */
-#undef	TARGET_ASM_SELECT_SECTION
-#define TARGET_ASM_SELECT_SECTION  default_elf_select_section
+#define NO_IMPLICIT_EXTERN_C		1
 
 #undef SWITCH_TAKES_ARG
 #define SWITCH_TAKES_ARG(CHAR) 						\
@@ -511,17 +92,6 @@ init_section ()								\
 #undef TARGET_SUBTARGET_DEFAULT
 #define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
 
-#define HANDLE_SYSV_PRAGMA 1
-
-/* Though OpenServer supports .weak in COFF, we don't use it.
- * G++ will frequently emit a symol as .weak and then (in the same .s 
- * file) declare it global.   The COFF assembler finds this unamusing.
- */
-#define SUPPORTS_WEAK (TARGET_ELF)
-#define ASM_WEAKEN_LABEL(FILE,NAME) \
-  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME);		\
-	fputc ('\n', FILE); } while (0)
-
 /*
  * Define sizes and types
  */
@@ -529,12 +99,14 @@ init_section ()								\
 #undef PTRDIFF_TYPE
 #undef WCHAR_TYPE
 #undef WCHAR_TYPE_SIZE
+#undef WINT_TYPE
 #undef LONG_DOUBLE_TYPE_SIZE
-#define LONG_DOUBLE_TYPE_SIZE 	96
 #define SIZE_TYPE		"unsigned int"
 #define PTRDIFF_TYPE		"int"
 #define WCHAR_TYPE		"long int"
 #define WCHAR_TYPE_SIZE		BITS_PER_WORD
+#define WINT_TYPE		"long int"
+#define LONG_DOUBLE_TYPE_SIZE 	96
 
 /*
  * New for multilib support. Set the default switches for multilib,
@@ -548,7 +120,7 @@ init_section ()								\
    With SCO Open Server 5.0, you now get the linker and assembler free,
    so that is what these specs are targeted for. These utilities are
    very argument sensitive: a space in the wrong place breaks everything.
-   So RMS, please forgive this mess. It works.
+   So please forgive this mess. It works.
 
    Parameters which can be passed to gcc, and their SCO equivalents:
    GCC Parameter                SCO Equivalent
@@ -564,77 +136,109 @@ init_section ()								\
    does.
 
    SCO also allows you to compile, link and generate either ELF or COFF
-   binaries. With gcc, unlike the SCO compiler, the default is ELF.
-   Specify -mcoff to gcc to produce COFF binaries. -fpic will get the
-   assembler and linker to produce PIC code.
+   binaries. With gcc, we now only support ELF mode.
+
+   GCC also requires that the user has installed OSS646, the Execution
+   Environment Update, or is running release 5.0.7 or later. This has
+   many fixes to the ELF link editor and assembler, and a considerably
+   improved libc and RTLD.
+
+   In terms of tool usage, we want to use the standard link editor always,
+   and either the GNU assembler or the native assembler. With OSS646 the
+   native assembler has grown up quite a bit. Some of the specs below
+   assume that /usr/gnu is the prefix for the GNU tools, because thats
+   where the SCO provided ones go. This is especially important for
+   include and library search path ordering. We want to look in /usr/gnu
+   first, becuase frequently people are linking against -lintl, and they
+   MEAN to link with gettext. What they get is the SCO intl library. Its
+   a REAL pity that GNU gettext chose that name; perhaps in a future
+   version they can be persuaded to change it to -lgnuintl and have a
+   link so that -lintl will work for other systems. The same goes for
+   header files. We want /usr/gnu/include searched for before the system
+   header files. Hence the -isystem /usr/gnu/include in the CPP_SPEC.
+   We get /usr/gnu/lib first by virtue of the MD_STARTFILE_PREFIX below.
 */
 
+#define MD_STARTFILE_PREFIX	"/usr/gnu/lib/"
+#define MD_STARTFILE_PREFIX_1	"/usr/ccs/lib/"
+
+#if USE_GAS
+# define MD_EXEC_PREFIX		"/usr/gnu/bin/"
+#else
+# define MD_EXEC_PREFIX		"/usr/ccs/bin/elf/"
+#endif
+
+/* Always use the system linker, please.  */
+#ifndef DEFAULT_LINKER
+# define DEFAULT_LINKER		"/usr/ccs/bin/elf/ld"
+#endif
+
 /* Set up assembler flags for PIC and ELF compilations */
 #undef ASM_SPEC
 
 #if USE_GAS
-  /* Leave ASM_SPEC undefined so we pick up the master copy from gcc.c 
-   * Undef MD_EXEC_PREFIX because we don't know where GAS is, but it's not
-   * likely in /usr/ccs/bin/ 
-   */
-#undef MD_EXEC_PREFIX 
+  /* Leave ASM_SPEC undefined so we pick up the master copy from gcc.c  */
 #else
-
 #define ASM_SPEC \
-   "-b %{!mcoff:elf}%{mcoff:coff \
-     %{static:%e-static not valid with -mcoff} \
-     %{shared:%e-shared not valid with -mcoff} \
-     %{symbolic:%e-symbolic not valid with -mcoff}} \
-    %{Ym,*} %{Yd,*} %{Wa,*:%*} \
-    %{!mcoff:-E%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},ELF %{Qn:} %{!Qy:-Qn}}"
+   "%{Ym,*} %{Yd,*} %{Wa,*:%*} \
+    -E%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},ELF %{Qn:} %{!Qy:-Qn}"
 #endif
 
-/* Use crt1.o as a startup file and crtn.o as a closing file.  */
+/*
+ * Use crti.o for shared objects, crt1.o for normal executables. Make sure
+ * to recognize both -G and -shared as a valid way of introducing shared
+ * library generation. This is important for backwards compatibility.
+ */
 
 #undef STARTFILE_SPEC
 #define STARTFILE_SPEC \
- "%{shared: %{!mcoff: crti.o%s}} \
-  %{!shared:\
+ "%{pg:%e-pg not supported on this platform} \
+  %{p:%{pp:%e-p and -pp specified - pick one}} \
+ %{!shared:\
    %{!symbolic: \
-    %{pg:gcrt.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}}} \
+    %{!G: \
+     %{pp:pcrt1elf.o%s}%{p:mcrt1.o%s}%{!p:%{!pp:crt1.o%s}}}}} \
+  crti.o%s \
   %{ansi:values-Xc.o%s} \
   %{!ansi: \
-   %{Xa:values-Xa.o%s} \
-    %{!Xa:%{Xc:values-Xc.o%s} \
-     %{!Xc:%{Xk:values-Xk.o%s} \
-      %{!Xk:%{Xt:values-Xt.o%s} \
-       %{!Xt:values-Xa.o%s}}}}} \
-  %{mcoff:crtbeginS.o%s} %{!mcoff:crtbegin.o%s}"
+   %{traditional:values-Xt.o%s} \
+    %{!traditional: \
+     %{Xa:values-Xa.o%s} \
+      %{!Xa:%{Xc:values-Xc.o%s} \
+       %{!Xc:%{Xk:values-Xk.o%s} \
+        %{!Xk:%{Xt:values-Xt.o%s} \
+         %{!Xt:values-Xa.o%s}}}}}} \
+  crtbegin.o%s"
 
 #undef ENDFILE_SPEC
 #define ENDFILE_SPEC \
- "%{!mcoff:crtend.o%s} \
-  %{mcoff:crtendS.o%s} \
-  %{pg:gcrtn.o%s}%{!pg:crtn.o%s}"
-
-#define TARGET_OS_CPP_BUILTINS()		\
-  do						\
-    {						\
-	builtin_define ("__unix");		\
-	builtin_define ("_SCO_DS");		\
-	builtin_define ("_M_I386");		\
-	builtin_define ("_M_XENIX");		\
-	builtin_define ("_M_UNIX");		\
-	builtin_assert ("system=svr3");		\
-	if (flag_iso)				\
-	  cpp_define (pfile, "_STRICT_ANSI");	\
-	if (flag_pic)							\
-	  {								\
-	    builtin_define ("__PIC__");					\
-	    builtin_define ("__pic__");					\
-	  }								\
-    }						\
+ "crtend.o%s crtn.o%s"
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	builtin_define ("__unix");				\
+	builtin_define ("_SCO_DS");				\
+	builtin_define ("_SCO_DS_LL");				\
+	builtin_define ("_SCO_ELF");				\
+	builtin_define ("_M_I386");				\
+	builtin_define ("_M_XENIX");				\
+	builtin_define ("_M_UNIX");				\
+	builtin_assert ("system=svr3");				\
+	if (flag_iso)						\
+	  cpp_define (pfile, "_STRICT_ANSI");			\
+	if (flag_pic)						\
+	  {							\
+	    builtin_define ("__PIC__");				\
+	    builtin_define ("__pic__");				\
+	  }							\
+    }								\
   while (0)
 
 #undef CPP_SPEC
 #define CPP_SPEC "\
-  %{fpic:%{mcoff:%e-fpic is not valid with -mcoff}} \
-  %{fPIC:%{mcoff:%e-fPIC is not valid with -mcoff}} \
+  -isystem /usr/gnu/include \
+  %{pthread:-D_REENTRANT} \
   %{!Xods30:-D_STRICT_NAMES} \
   %{!ansi:%{!posix:%{!Xods30:-D_SCO_XPG_VERS=4}}} \
   %{ansi:-isystem include/ansi%s -isystem /usr/include/ansi} \
@@ -652,8 +256,6 @@ init_section ()								\
                       -DM_BITFIELDS -DM_SYS5 -DM_SYSV -DM_INTERNAT -DM_SYSIII \
                       -DM_WORDSWAP}}}} \
   %{scointl:-DM_INTERNAT -D_M_INTERNAT} \
-  %{!mcoff:-D_SCO_ELF} \
-  %{mcoff:-D_M_COFF -D_SCO_COFF} \
   %{Xa:-D_SCO_C_DIALECT=1} \
   %{!Xa:%{Xc:-D_SCO_C_DIALECT=3} \
    %{!Xc:%{Xk:-D_SCO_C_DIALECT=4} \
@@ -662,109 +264,38 @@ init_section ()								\
 
 #undef LINK_SPEC
 #define LINK_SPEC \
- "-b %{!mcoff:elf}%{mcoff:coff \
-   %{static:%e-static not valid with -mcoff} \
-   %{shared:%e-shared not valid with -mcoff} \
-   %{symbolic:%e-symbolic not valid with -mcoff} \
-   %{fpic:%e-fpic not valid with -mcoff} \
-   %{fPIC:%e-fPIC not valid with -mcoff}} \
-  -R%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},%{mcoff:COFF}%{!mcoff:ELF} \
-  %{Wl,*%*} %{YP,*} %{YL,*} %{YU,*} \
+ "%{!shared:%{!symbolic:%{!G:-E%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},ELF}}} \
+  %{Wl,*:%*} %{YP,*} %{YL,*} %{YU,*} \
   %{!YP,*:%{p:-YP,/usr/ccs/libp:/lib/libp:/usr/lib/libp:/usr/ccs/lib:/lib:/usr/lib} \
    %{!p:-YP,/usr/ccs/lib:/lib:/usr/lib}} \
-  %{h*} %{static:-dn -Bstatic} %{shared:-G -dy %{!z*:-z text}} \
-  %{symbolic:-Bsymbolic -G -dy %{!z*:-z text}} %{z*} %{R*} %{Y*} \
-  %{G:-G} %{!mcoff:%{Qn:} %{!Qy:-Qn}}"
-
-/* The SCO COFF linker gets confused on the difference between "-ofoo"
-   and "-o foo".   So we just always force a single space.  */
-
-#define SWITCHES_NEED_SPACES "o"
+  %{h*} %{static:-dn -Bstatic %{G:%e-G and -static are mutually exclusive}} \
+  %{shared:%{!G:-G}} %{G:%{!shared:-G}} %{shared:%{G:-G}} \
+  %{shared:-dy %{symbolic:-Bsymbolic -G} %{z*}} %{R*} %{Y*} \
+  %{Qn:} %{!Qy:-Qn} -z alt_resolve"
 
 /* Library spec. If we are not building a shared library, provide the
    standard libraries, as per the SCO compiler.  */
 
 #undef LIB_SPEC
 #define LIB_SPEC \
- "%{shared:pic/libgcc.a%s}%{!shared:%{!symbolic:-lcrypt -lgen -lc}}"
+ "%{shared:%{!G:pic/libgcc.a%s}} \
+  %{G:%{!shared:pic/libgcc.a%s}} \
+  %{shared:%{G:pic/libgcc.a%s}} \
+  %{p:%{!pp:-lelfprof -lelf}} %{pp:%{!p:-lelfprof -lelf}} \
+  %{!shared:%{!symbolic:%{!G:-lcrypt -lgen -lc %{pthread:-lpthread}}}}"
 
 #undef LIBGCC_SPEC
 #define LIBGCC_SPEC \
- "%{!shared:-lgcc}"
+ "%{!shared:%{!G:-lgcc}}"
 
-#define MASK_COFF     		010000000000	/* Mask for elf generation */
-#define TARGET_ELF              (1) /* (!(target_flags & MASK_COFF)) */
+/* Here for legacy support only so we still accept -melf flag */
+#define MASK_COFF     		010000000000	/* Mask for COFF generation */
+#define TARGET_ELF              (1)
 
 #undef SUBTARGET_SWITCHES
 #define SUBTARGET_SWITCHES 					\
 	{ "elf", -MASK_COFF, N_("Generate ELF output")  },
 
-#define NO_DOLLAR_IN_LABEL
-
-/* Implicit library calls should use memcpy, not bcopy, etc.  They are 
-   faster on OpenServer libraries.  */
-
-#define TARGET_MEM_FUNCTIONS
-
-/* Biggest alignment supported by the object file format of this
-   machine.  Use this macro to limit the alignment which can be
-   specified using the `__attribute__ ((aligned (N)))' construct.  If
-   not defined, the default value is `BIGGEST_ALIGNMENT'.  */
-
-#define MAX_OFILE_ALIGNMENT (32768*8)
-
-/* Define the `__builtin_va_list' type for the ABI.  On OpenServer, this
-   type is `char *'.  */
-#undef BUILD_VA_LIST_TYPE
-#define BUILD_VA_LIST_TYPE(VALIST) \
-  (VALIST) = build_pointer_type (char_type_node)
-
-
-/*
-Here comes some major hackery to get the crt stuff to compile properly.
-Since we can (and do) compile for both COFF and ELF environments, we
-set things up accordingly, based on the pre-processor defines for ELF
-and COFF. This is insane, but then I guess having one compiler with a
-single back-end supporting two vastly different file format types is
-a little insane too. But it is not impossible and we get a useful
-compiler at the end of the day. Onward we go ...
-*/
-
-#if defined(CRT_BEGIN) || defined(CRT_END) || defined(IN_LIBGCC2)
-# undef OBJECT_FORMAT_ELF
-# undef INIT_SECTION_ASM_OP
-# undef FINI_SECTION_ASM_OP
-# undef CTORS_SECTION_ASM_OP
-# undef DTORS_SECTION_ASM_OP
-# undef EH_FRAME_SECTION_NAME
-# undef CTOR_LIST_BEGIN
-# undef CTOR_LIST_END
-# undef DO_GLOBAL_CTORS_BODY
-
-# if defined (_SCO_ELF)
-#  define OBJECT_FORMAT_ELF
-#  define INIT_SECTION_ASM_OP INIT_SECTION_ASM_OP_ELF
-#  define FINI_SECTION_ASM_OP FINI_SECTION_ASM_OP_ELF
-#  define DTORS_SECTION_ASM_OP DTORS_SECTION_ASM_OP_ELF
-#  define CTORS_SECTION_ASM_OP CTORS_SECTION_ASM_OP_ELF
-#  define EH_FRAME_SECTION_NAME EH_FRAME_SECTION_NAME_ELF
-# else /* ! _SCO_ELF */
-#  define INIT_SECTION_ASM_OP INIT_SECTION_ASM_OP_COFF
-#  define FINI_SECTION_ASM_OP FINI_SECTION_ASM_OP_COFF
-#  define DTORS_SECTION_ASM_OP DTORS_SECTION_ASM_OP_COFF
-#  define CTORS_SECTION_ASM_OP CTORS_SECTION_ASM_OP_COFF
-#  define EH_FRAME_SECTION_NAME EH_FRAME_SECTION_NAME_COFF
-#  define CTOR_LIST_BEGIN asm (INIT_SECTION_ASM_OP); asm ("pushl $0")
-#  define CTOR_LIST_END CTOR_LIST_BEGIN
-#  define DO_GLOBAL_CTORS_BODY						\
-do {									\
-     func_ptr *p, *beg = alloca(0);					\
-     for (p = beg; *p;)							\
-      (*p++) ();							\
-} while (0)
-# endif /* ! _SCO_ELF */
-#endif /* CRT_BEGIN !! CRT_END */
-
 /* Handle special EH pointer encodings.  Absolute, pc-relative, and
    indirect are handled automatically.  */
 #define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
@@ -798,10 +329,3 @@ do {									\
 	   : "=d"(BASE))
 #endif
 
-/* Select a format to encode pointers in exception handling data.  CODE
-   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
-   true if the symbol may be affected by dynamic relocations.  */
-#undef ASM_PREFERRED_EH_DATA_FORMAT
-#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
-  (flag_pic ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_datarel	\
-   : DW_EH_PE_absptr)
diff --git a/contrib/gcc/config/i386/sol2.h b/contrib/gcc/config/i386/sol2.h
index fb5a184..9089a1d 100644
--- a/contrib/gcc/config/i386/sol2.h
+++ b/contrib/gcc/config/i386/sol2.h
@@ -1,5 +1,5 @@
 /* Target definitions for GNU compiler for Intel 80386 running Solaris 2
-   Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+   Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
    Contributed by Fred Fish (fnf@cygnus.com).
 
@@ -20,8 +20,6 @@ along with GNU CC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
-#define CMOV_SUN_AS_SYNTAX 1
-
 /* The Solaris 2.0 x86 linker botches alignment of code sections.
    It tries to align to a 16 byte boundary by padding with 0x00000090
    ints, rather than 0x90 bytes (nop).  This generates trash in the
diff --git a/contrib/gcc/config/i386/t-sco5 b/contrib/gcc/config/i386/t-sco5
index d0c457e..f92a624 100644
--- a/contrib/gcc/config/i386/t-sco5
+++ b/contrib/gcc/config/i386/t-sco5
@@ -1,18 +1,16 @@
-# We need to use -fPIC when we are using gcc to compile the routines in
-# crtstuff.c.  This is only really needed when we are going to use gcc/g++
-# to produce a shared library, but since we don't know ahead of time when
-# we will be doing that, we just always use -fPIC when compiling the
-# routines in crtstuff.c.  Likewise for libgcc2.c.   This is less painful
-# than multilibbing everything with PIC and PIC-not variants.
-
-# The pushl in CTOR initialization interferes with frame pointer elimination.
-
+# We multilib libgcc for -fPIC, to get real PIC code in it.
+# NOTE: We must use -fPIC on crt{begi,end}.o else we get an RTLD error
+# "cant set protections on segment of length blah at 0x8048000".
 CRTSTUFF_T_CFLAGS   = -fPIC -fno-omit-frame-pointer
-TARGET_LIBGCC2_CFLAGS = -fPIC
 
-crti.o: $(srcdir)/config/i386/sol2-ci.asm $(GCC_PASSES)
-	sed -e '/^!/d' <$(srcdir)/config/i386/sol2-ci.asm >crti.s
-	$(GCC_FOR_TARGET) -c -o crti.o crti.s
+MULTILIB_OPTIONS    = fPIC
+MULTILIB_DIRNAMES   = pic
+MUTLILIB_EXCEPTIONS =
+MULTILIB_MATCHES    = fPIC=fpic
+MULTILIB_EXTRA_OPTS =
+
+LIBGCC=stmp-multilib
+INSTALL_LIBGCC=install-multilib
 
 # See all the declarations.
 FIXPROTO_DEFINES = -D_XOPEN_SOURCE -D_POSIX_C_SOURCE=2
diff --git a/contrib/gcc/config/i386/xmmintrin.h b/contrib/gcc/config/i386/xmmintrin.h
index 43a05c1..1829ab0 100644
--- a/contrib/gcc/config/i386/xmmintrin.h
+++ b/contrib/gcc/config/i386/xmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
 
    This file is part of GNU CC.
 
@@ -25,7 +25,7 @@
    Public License.  */
 
 /* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 5.0.  */
+   User Guide and Reference, version 8.0.  */
 
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
@@ -475,6 +475,12 @@ _mm_cvtss_si32 (__m128 __A)
   return __builtin_ia32_cvtss2si ((__v4sf) __A);
 }
 
+static __inline int
+_mm_cvt_ss2si (__m128 __A)
+{
+  return _mm_cvtss_si32 (__A);
+}
+
 #ifdef __x86_64__
 /* Convert the lower SPFP value to a 32-bit integer according to the current
    rounding mode.  */
@@ -493,6 +499,12 @@ _mm_cvtps_pi32 (__m128 __A)
   return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
 }
 
+static __inline __m64
+_mm_cvt_ps2pi (__m128 __A)
+{
+  return _mm_cvtps_pi32 (__A);
+}
+
 /* Truncate the lower SPFP value to a 32-bit integer.  */
 static __inline int
 _mm_cvttss_si32 (__m128 __A)
@@ -500,6 +512,12 @@ _mm_cvttss_si32 (__m128 __A)
   return __builtin_ia32_cvttss2si ((__v4sf) __A);
 }
 
+static __inline int
+_mm_cvtt_ss2si (__m128 __A)
+{
+  return _mm_cvttss_si32 (__A);
+}
+
 #ifdef __x86_64__
 /* Truncate the lower SPFP value to a 32-bit integer.  */
 static __inline long long
@@ -517,6 +535,12 @@ _mm_cvttps_pi32 (__m128 __A)
   return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
 }
 
+static __inline __m64
+_mm_cvtt_ps2pi (__m128 __A)
+{
+  return _mm_cvttps_pi32 (__A);
+}
+
 /* Convert B to a SPFP value and insert it as element zero in A.  */
 static __inline __m128
 _mm_cvtsi32_ss (__m128 __A, int __B)
@@ -524,6 +548,12 @@ _mm_cvtsi32_ss (__m128 __A, int __B)
   return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
 }
 
+static __inline __m128
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+  return _mm_cvtsi32_ss (__A, __B);
+}
+
 #ifdef __x86_64__
 /* Convert B to a SPFP value and insert it as element zero in A.  */
 static __inline __m128
@@ -541,6 +571,12 @@ _mm_cvtpi32_ps (__m128 __A, __m64 __B)
   return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
 }
 
+static __inline __m128
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+  return _mm_cvtpi32_ps (__A, __B);
+}
+
 /* Convert the four signed 16-bit values in A to SPFP form.  */
 static __inline __m128
 _mm_cvtpi16_ps (__m64 __A)
@@ -942,9 +978,16 @@ _mm_extract_pi16 (__m64 __A, int __N)
 {
   return __builtin_ia32_pextrw ((__v4hi)__A, __N);
 }
+
+static __inline int
+_m_pextrw (__m64 __A, int __N)
+{
+  return _mm_extract_pi16 (__A, __N);
+}
 #else
 #define _mm_extract_pi16(A, N) \
   __builtin_ia32_pextrw ((__v4hi)(A), (N))
+#define _m_pextrw(A, N)		_mm_extract_pi16((A), (N))
 #endif
 
 /* Inserts word D into one of four words of A.  The selector N must be
@@ -955,9 +998,16 @@ _mm_insert_pi16 (__m64 __A, int __D, int __N)
 {
   return (__m64)__builtin_ia32_pinsrw ((__v4hi)__A, __D, __N);
 }
+
+static __inline __m64
+_m_pinsrw (__m64 __A, int __D, int __N)
+{
+  return _mm_insert_pi16 (__A, __D, __N);
+}
 #else
 #define _mm_insert_pi16(A, D, N) \
   ((__m64) __builtin_ia32_pinsrw ((__v4hi)(A), (D), (N)))
+#define _m_pinsrw(A, D, N)	 _mm_insert_pi16((A), (D), (N))
 #endif
 
 /* Compute the element-wise maximum of signed 16-bit values.  */
@@ -967,6 +1017,12 @@ _mm_max_pi16 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
 }
 
+static __inline __m64
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+  return _mm_max_pi16 (__A, __B);
+}
+
 /* Compute the element-wise maximum of unsigned 8-bit values.  */
 static __inline __m64
 _mm_max_pu8 (__m64 __A, __m64 __B)
@@ -974,6 +1030,12 @@ _mm_max_pu8 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
 }
 
+static __inline __m64
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+  return _mm_max_pu8 (__A, __B);
+}
+
 /* Compute the element-wise minimum of signed 16-bit values.  */
 static __inline __m64
 _mm_min_pi16 (__m64 __A, __m64 __B)
@@ -981,6 +1043,12 @@ _mm_min_pi16 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
 }
 
+static __inline __m64
+_m_pminsw (__m64 __A, __m64 __B)
+{
+  return _mm_min_pi16 (__A, __B);
+}
+
 /* Compute the element-wise minimum of unsigned 8-bit values.  */
 static __inline __m64
 _mm_min_pu8 (__m64 __A, __m64 __B)
@@ -988,6 +1056,12 @@ _mm_min_pu8 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
 }
 
+static __inline __m64
+_m_pminub (__m64 __A, __m64 __B)
+{
+  return _mm_min_pu8 (__A, __B);
+}
+
 /* Create an 8-bit mask of the signs of 8-bit values.  */
 static __inline int
 _mm_movemask_pi8 (__m64 __A)
@@ -995,6 +1069,12 @@ _mm_movemask_pi8 (__m64 __A)
   return __builtin_ia32_pmovmskb ((__v8qi)__A);
 }
 
+static __inline int
+_m_pmovmskb (__m64 __A)
+{
+  return _mm_movemask_pi8 (__A);
+}
+
 /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
    in B and produce the high 16 bits of the 32-bit results.  */
 static __inline __m64
@@ -1003,6 +1083,12 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
 }
 
+static __inline __m64
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+  return _mm_mulhi_pu16 (__A, __B);
+}
+
 /* Return a combination of the four 16-bit values in A.  The selector
    must be an immediate.  */
 #if 0
@@ -1011,9 +1097,16 @@ _mm_shuffle_pi16 (__m64 __A, int __N)
 {
   return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
 }
+
+static __inline __m64
+_m_pshufw (__m64 __A, int __N)
+{
+  return _mm_shuffle_pi16 (__A, __N);
+}
 #else
 #define _mm_shuffle_pi16(A, N) \
   ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N)))
+#define _m_pshufw(A, N)		_mm_shuffle_pi16 ((A), (N))
 #endif
 
 /* Conditionally store byte elements of A into P.  The high bit of each
@@ -1025,6 +1118,12 @@ _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
 }
 
+static __inline void
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+  _mm_maskmove_si64 (__A, __N, __P);
+}
+
 /* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
 static __inline __m64
 _mm_avg_pu8 (__m64 __A, __m64 __B)
@@ -1032,6 +1131,12 @@ _mm_avg_pu8 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
 }
 
+static __inline __m64
+_m_pavgb (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu8 (__A, __B);
+}
+
 /* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
 static __inline __m64
 _mm_avg_pu16 (__m64 __A, __m64 __B)
@@ -1039,6 +1144,12 @@ _mm_avg_pu16 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
 }
 
+static __inline __m64
+_m_pavgw (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu16 (__A, __B);
+}
+
 /* Compute the sum of the absolute differences of the unsigned 8-bit
    values in A and B.  Return the value in the lower 16-bit word; the
    upper words are cleared.  */
@@ -1048,6 +1159,12 @@ _mm_sad_pu8 (__m64 __A, __m64 __B)
   return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
 }
 
+static __inline __m64
+_m_psadbw (__m64 __A, __m64 __B)
+{
+  return _mm_sad_pu8 (__A, __B);
+}
+
 /* Loads one cache line from address P to a location "closer" to the
    processor.  The selector I specifies the type of prefetch operation.  */
 #if 0
@@ -1106,1469 +1223,8 @@ do {									\
   (row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD);			\
 } while (0)
 
-#ifdef __SSE2__
-/* SSE2 */
-typedef int __v2df __attribute__ ((mode (V2DF)));
-typedef int __v2di __attribute__ ((mode (V2DI)));
-typedef int __v4si __attribute__ ((mode (V4SI)));
-typedef int __v8hi __attribute__ ((mode (V8HI)));
-typedef int __v16qi __attribute__ ((mode (V16QI)));
-
-/* Create a selector for use with the SHUFPD instruction.  */
-#define _MM_SHUFFLE2(fp1,fp0) \
- (((fp1) << 1) | (fp0))
-
-#define __m128i __v2di
-#define __m128d __v2df
-
-/* Create a vector with element 0 as *P and the rest zero.  */
-static __inline __m128d
-_mm_load_sd (double const *__P)
-{
-  return (__m128d) __builtin_ia32_loadsd (__P);
-}
-
-/* Create a vector with all two elements equal to *P.  */
-static __inline __m128d
-_mm_load1_pd (double const *__P)
-{
-  __v2df __tmp = __builtin_ia32_loadsd (__P);
-  return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));
-}
-
-static __inline __m128d
-_mm_load_pd1 (double const *__P)
-{
-  return _mm_load1_pd (__P);
-}
-
-/* Load two DPFP values from P.  The addresd must be 16-byte aligned.  */
-static __inline __m128d
-_mm_load_pd (double const *__P)
-{
-  return (__m128d) __builtin_ia32_loadapd (__P);
-}
-
-/* Load two DPFP values from P.  The addresd need not be 16-byte aligned.  */
-static __inline __m128d
-_mm_loadu_pd (double const *__P)
-{
-  return (__m128d) __builtin_ia32_loadupd (__P);
-}
-
-/* Load two DPFP values in reverse order.  The addresd must be aligned.  */
-static __inline __m128d
-_mm_loadr_pd (double const *__P)
-{
-  __v2df __tmp = __builtin_ia32_loadapd (__P);
-  return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
-}
-
-/* Create a vector with element 0 as F and the rest zero.  */
-static __inline __m128d
-_mm_set_sd (double __F)
-{
-  return (__m128d) __builtin_ia32_loadsd (&__F);
-}
-
-/* Create a vector with all two elements equal to F.  */
-static __inline __m128d
-_mm_set1_pd (double __F)
-{
-  __v2df __tmp = __builtin_ia32_loadsd (&__F);
-  return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));
-}
-
-static __inline __m128d
-_mm_set_pd1 (double __F)
-{
-  return _mm_set1_pd (__F);
-}
-
-/* Create the vector [Z Y].  */
-static __inline __m128d
-_mm_set_pd (double __Z, double __Y)
-{
-  union {
-    double __a[2];
-    __m128d __v;
-  } __u;
-
-  __u.__a[0] = __Y;
-  __u.__a[1] = __Z;
-
-  return __u.__v;
-}
-
-/* Create the vector [Y Z].  */
-static __inline __m128d
-_mm_setr_pd (double __Z, double __Y)
-{
-  return _mm_set_pd (__Y, __Z);
-}
-
-/* Create a vector of zeros.  */
-static __inline __m128d
-_mm_setzero_pd (void)
-{
-  return (__m128d) __builtin_ia32_setzeropd ();
-}
-
-/* Stores the lower DPFP value.  */
-static __inline void
-_mm_store_sd (double *__P, __m128d __A)
-{
-  __builtin_ia32_storesd (__P, (__v2df)__A);
-}
-
-/* Store the lower DPFP value acrosd two words.  */
-static __inline void
-_mm_store1_pd (double *__P, __m128d __A)
-{
-  __v2df __va = (__v2df)__A;
-  __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0));
-  __builtin_ia32_storeapd (__P, __tmp);
-}
-
-static __inline void
-_mm_store_pd1 (double *__P, __m128d __A)
-{
-  _mm_store1_pd (__P, __A);
-}
-
-/* Store two DPFP values.  The addresd must be 16-byte aligned.  */
-static __inline void
-_mm_store_pd (double *__P, __m128d __A)
-{
-  __builtin_ia32_storeapd (__P, (__v2df)__A);
-}
-
-/* Store two DPFP values.  The addresd need not be 16-byte aligned.  */
-static __inline void
-_mm_storeu_pd (double *__P, __m128d __A)
-{
-  __builtin_ia32_storeupd (__P, (__v2df)__A);
-}
-
-/* Store two DPFP values in reverse order.  The addresd must be aligned.  */
-static __inline void
-_mm_storer_pd (double *__P, __m128d __A)
-{
-  __v2df __va = (__v2df)__A;
-  __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1));
-  __builtin_ia32_storeapd (__P, __tmp);
-}
-
-/* Sets the low DPFP value of A from the low value of B.  */
-static __inline __m128d
-_mm_move_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
-}
-
-
-static __inline __m128d
-_mm_add_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_add_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_sub_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_sub_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_mul_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_mul_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_div_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_div_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_sqrt_pd (__m128d __A)
-{
-  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
-}
-
-/* Return pair {sqrt (A[0), B[1]}.  */
-static __inline __m128d
-_mm_sqrt_sd (__m128d __A, __m128d __B)
-{
-  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
-  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
-}
-
-static __inline __m128d
-_mm_min_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_min_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_max_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_max_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_and_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_andnot_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_or_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_xor_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpeq_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmplt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmple_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpgt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpge_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpneq_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpnlt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpnle_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpngt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpnge_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpord_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpunord_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpeq_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmplt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmple_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpgt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmpltsd ((__v2df) __B,
-								 (__v2df)
-								 __A));
-}
-
-static __inline __m128d
-_mm_cmpge_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmplesd ((__v2df) __B,
-								 (__v2df)
-								 __A));
-}
-
-static __inline __m128d
-_mm_cmpneq_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpnlt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpnle_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpngt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmpnltsd ((__v2df) __B,
-								  (__v2df)
-								  __A));
-}
-
-static __inline __m128d
-_mm_cmpnge_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmpnlesd ((__v2df) __B,
-								  (__v2df)
-								  __A));
-}
-
-static __inline __m128d
-_mm_cmpord_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_cmpunord_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_comieq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_comilt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_comile_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_comigt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_comige_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_comineq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_ucomieq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_ucomilt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_ucomile_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_ucomigt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_ucomige_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_ucomineq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
-}
-
-/* Create a vector with element 0 as *P and the rest zero.  */
-
-static __inline __m128i
-_mm_load_si128 (__m128i const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqa ((char const *)__P);
-}
-
-static __inline __m128i
-_mm_loadu_si128 (__m128i const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
-}
-
-static __inline __m128i
-_mm_loadl_epi64 (__m128i const *__P)
-{
-  return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P);
-}
-
-static __inline void
-_mm_store_si128 (__m128i *__P, __m128i __B)
-{
-  __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B);
-}
-
-static __inline void
-_mm_storeu_si128 (__m128i *__P, __m128i __B)
-{
-  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
-}
-
-static __inline void
-_mm_storel_epi64 (__m128i *__P, __m128i __B)
-{
-  *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
-}
-
-static __inline __m64
-_mm_movepi64_pi64 (__m128i __B)
-{
-  return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
-}
-
-static __inline __m128i
-_mm_move_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_movq ((__v2di)__A);
-}
-
-/* Create a vector of zeros.  */
-static __inline __m128i
-_mm_setzero_si128 (void)
-{
-  return (__m128i) __builtin_ia32_setzero128 ();
-}
-
-static __inline __m128i
-_mm_set_epi64 (__m64 __A,  __m64 __B)
-{
-  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
-  __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
-  return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp);
-}
-
-/* Create the vector [Z Y X W].  */
-static __inline __m128i
-_mm_set_epi32 (int __Z, int __Y, int __X, int __W)
-{
-  union {
-    int __a[4];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __W;
-  __u.__a[1] = __X;
-  __u.__a[2] = __Y;
-  __u.__a[3] = __Z;
-
-  return __u.__v;
-}
-
-#ifdef __x86_64__
-/* Create the vector [Z Y].  */
-static __inline __m128i
-_mm_set_epi64x (long long __Z, long long __Y)
-{
-  union {
-    long __a[2];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __Y;
-  __u.__a[1] = __Z;
-
-  return __u.__v;
-}
-#endif
-
-/* Create the vector [S T U V Z Y X W].  */
-static __inline __m128i
-_mm_set_epi16 (short __Z, short __Y, short __X, short __W,
-	       short __V, short __U, short __T, short __S)
-{
-  union {
-    short __a[8];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __S;
-  __u.__a[1] = __T;
-  __u.__a[2] = __U;
-  __u.__a[3] = __V;
-  __u.__a[4] = __W;
-  __u.__a[5] = __X;
-  __u.__a[6] = __Y;
-  __u.__a[7] = __Z;
-
-  return __u.__v;
-}
-
-/* Create the vector [S T U V Z Y X W].  */
-static __inline __m128i
-_mm_set_epi8 (char __Z, char __Y, char __X, char __W,
-	      char __V, char __U, char __T, char __S,
-	      char __Z1, char __Y1, char __X1, char __W1,
-	      char __V1, char __U1, char __T1, char __S1)
-{
-  union {
-    char __a[16];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __S1;
-  __u.__a[1] = __T1;
-  __u.__a[2] = __U1;
-  __u.__a[3] = __V1;
-  __u.__a[4] = __W1;
-  __u.__a[5] = __X1;
-  __u.__a[6] = __Y1;
-  __u.__a[7] = __Z1;
-  __u.__a[8] = __S;
-  __u.__a[9] = __T;
-  __u.__a[10] = __U;
-  __u.__a[11] = __V;
-  __u.__a[12] = __W;
-  __u.__a[13] = __X;
-  __u.__a[14] = __Y;
-  __u.__a[15] = __Z;
-
-  return __u.__v;
-}
-
-static __inline __m128i
-_mm_set1_epi64 (__m64 __A)
-{
-  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
-  return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp);
-}
-
-static __inline __m128i
-_mm_set1_epi32 (int __A)
-{
-  __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A);
-  return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
-}
-
-#ifdef __x86_64__
-static __inline __m128i
-_mm_set1_epi64x (long long __A)
-{
-  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
-  return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
-}
-#endif
-
-static __inline __m128i
-_mm_set1_epi16 (short __A)
-{
-  int __Acopy = (unsigned short)__A;
-  __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
-  __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp);
-  return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
-}
-
-static __inline __m128i
-_mm_set1_epi8 (char __A)
-{
-  int __Acopy = (unsigned char)__A;
-  __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
-  __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
-  __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
-  return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
-}
-
-static __inline __m128i
-_mm_setr_epi64 (__m64 __A,  __m64 __B)
-{
-  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
-  __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
-  return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2);
-}
-
-/* Create the vector [Z Y X W].  */
-static __inline __m128i
-_mm_setr_epi32 (int __W, int __X, int __Y, int __Z)
-{
-  union {
-    int __a[4];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __W;
-  __u.__a[1] = __X;
-  __u.__a[2] = __Y;
-  __u.__a[3] = __Z;
-
-  return __u.__v;
-}
-/* Create the vector [S T U V Z Y X W].  */
-static __inline __m128i
-_mm_setr_epi16 (short __S, short __T, short __U, short __V,
-	        short __W, short __X, short __Y, short __Z)
-{
-  union {
-    short __a[8];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __S;
-  __u.__a[1] = __T;
-  __u.__a[2] = __U;
-  __u.__a[3] = __V;
-  __u.__a[4] = __W;
-  __u.__a[5] = __X;
-  __u.__a[6] = __Y;
-  __u.__a[7] = __Z;
-
-  return __u.__v;
-}
-
-/* Create the vector [S T U V Z Y X W].  */
-static __inline __m128i
-_mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1,
-	       char __W1, char __X1, char __Y1, char __Z1,
-	       char __S, char __T, char __U, char __V,
-	       char __W, char __X, char __Y, char __Z)
-{
-  union {
-    char __a[16];
-    __m128i __v;
-  } __u;
-
-  __u.__a[0] = __S1;
-  __u.__a[1] = __T1;
-  __u.__a[2] = __U1;
-  __u.__a[3] = __V1;
-  __u.__a[4] = __W1;
-  __u.__a[5] = __X1;
-  __u.__a[6] = __Y1;
-  __u.__a[7] = __Z1;
-  __u.__a[8] = __S;
-  __u.__a[9] = __T;
-  __u.__a[10] = __U;
-  __u.__a[11] = __V;
-  __u.__a[12] = __W;
-  __u.__a[13] = __X;
-  __u.__a[14] = __Y;
-  __u.__a[15] = __Z;
-
-  return __u.__v;
-}
-
-static __inline __m128d
-_mm_cvtepi32_pd (__m128i __A)
-{
-  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
-}
-
-static __inline __m128
-_mm_cvtepi32_ps (__m128i __A)
-{
-  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
-}
-
-static __inline __m128i
-_mm_cvtpd_epi32 (__m128d __A)
-{
-  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
-}
-
-static __inline __m64
-_mm_cvtpd_pi32 (__m128d __A)
-{
-  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
-}
-
-static __inline __m128
-_mm_cvtpd_ps (__m128d __A)
-{
-  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
-}
-
-static __inline __m128i
-_mm_cvttpd_epi32 (__m128d __A)
-{
-  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
-}
-
-static __inline __m64
-_mm_cvttpd_pi32 (__m128d __A)
-{
-  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
-}
-
-static __inline __m128d
-_mm_cvtpi32_pd (__m64 __A)
-{
-  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
-}
-
-static __inline __m128i
-_mm_cvtps_epi32 (__m128 __A)
-{
-  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
-}
-
-static __inline __m128i
-_mm_cvttps_epi32 (__m128 __A)
-{
-  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
-}
-
-static __inline __m128d
-_mm_cvtps_pd (__m128 __A)
-{
-  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
-}
-
-static __inline int
-_mm_cvtsd_si32 (__m128d __A)
-{
-  return __builtin_ia32_cvtsd2si ((__v2df) __A);
-}
-
-#ifdef __x86_64__
-static __inline long long
-_mm_cvtsd_si64x (__m128d __A)
-{
-  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
-}
-#endif
-
-static __inline int
-_mm_cvttsd_si32 (__m128d __A)
-{
-  return __builtin_ia32_cvttsd2si ((__v2df) __A);
-}
-
-#ifdef __x86_64__
-static __inline long long
-_mm_cvttsd_si64x (__m128d __A)
-{
-  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
-}
-#endif
-
-static __inline __m128
-_mm_cvtsd_ss (__m128 __A, __m128d __B)
-{
-  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
-}
-
-static __inline __m128d
-_mm_cvtsi32_sd (__m128d __A, int __B)
-{
-  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
-}
-
-#ifdef __x86_64__
-static __inline __m128d
-_mm_cvtsi64x_sd (__m128d __A, long long __B)
-{
-  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
-}
-#endif
-
-static __inline __m128d
-_mm_cvtss_sd (__m128d __A, __m128 __B)
-{
-  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
-}
-
-#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
-
-static __inline __m128d
-_mm_unpackhi_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_unpacklo_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_loadh_pd (__m128d __A, double const *__B)
-{
-  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
-}
-
-static __inline void
-_mm_storeh_pd (double *__A, __m128d __B)
-{
-  __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
-}
-
-static __inline __m128d
-_mm_loadl_pd (__m128d __A, double const *__B)
-{
-  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
-}
-
-static __inline void
-_mm_storel_pd (double *__A, __m128d __B)
-{
-  __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
-}
-
-static __inline int
-_mm_movemask_pd (__m128d __A)
-{
-  return __builtin_ia32_movmskpd ((__v2df)__A);
-}
-
-static __inline __m128i
-_mm_packs_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_packs_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_packus_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_add_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_add_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_add_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_add_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_adds_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_adds_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_adds_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_adds_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_sub_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_sub_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_sub_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_sub_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_subs_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_subs_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_subs_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_subs_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_madd_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_mulhi_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_mullo_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m64
-_mm_mul_su32 (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
-}
-
-static __inline __m128i
-_mm_mul_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_sll_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_sll_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_sll_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_sra_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_sra_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_srl_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_srl_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_srl_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_slli_epi16 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
-}
-
-static __inline __m128i
-_mm_slli_epi32 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
-}
-
-static __inline __m128i
-_mm_slli_epi64 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
-}
-
-static __inline __m128i
-_mm_srai_epi16 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
-}
-
-static __inline __m128i
-_mm_srai_epi32 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
-}
-
-#if 0
-static __m128i __attribute__((__always_inline__))
-_mm_srli_si128 (__m128i __A, const int __B)
-{
-  return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
-}
-
-static __m128i __attribute__((__always_inline__))
-_mm_srli_si128 (__m128i __A, const int __B)
-{
-  return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
-}
-#endif
-#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
-#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
-
-static __inline __m128i
-_mm_srli_epi16 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
-}
-
-static __inline __m128i
-_mm_srli_epi32 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
-}
-
-static __inline __m128i
-_mm_srli_epi64 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
-}
-
-static __inline __m128i
-_mm_and_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_andnot_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_or_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_xor_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
-}
-
-static __inline __m128i
-_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
-}
-
-static __inline __m128i
-_mm_cmplt_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
-}
-
-static __inline __m128i
-_mm_cmplt_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
-}
-
-static __inline __m128i
-_mm_cmplt_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
-}
-
-static __inline __m128i
-_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
-}
-
-#define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B)
-
-#define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C))
-
-static __inline __m128i
-_mm_max_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_max_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_min_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_min_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline int
-_mm_movemask_epi8 (__m128i __A)
-{
-  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
-}
-
-static __inline __m128i
-_mm_mulhi_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
-#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
-#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
-
-static __inline void
-_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
-{
-  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
-}
-
-static __inline __m128i
-_mm_avg_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline __m128i
-_mm_avg_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-static __inline __m128i
-_mm_sad_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-static __inline void
-_mm_stream_si32 (int *__A, int __B)
-{
-  __builtin_ia32_movnti (__A, __B);
-}
-
-static __inline void
-_mm_stream_si128 (__m128i *__A, __m128i __B)
-{
-  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
-}
-
-static __inline void
-_mm_stream_pd (double *__A, __m128d __B)
-{
-  __builtin_ia32_movntpd (__A, (__v2df)__B);
-}
-
-static __inline __m128i
-_mm_movpi64_epi64 (__m64 __A)
-{
-  return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A);
-}
-
-static __inline void
-_mm_clflush (void const *__A)
-{
-  return __builtin_ia32_clflush (__A);
-}
-
-static __inline void
-_mm_lfence (void)
-{
-  __builtin_ia32_lfence ();
-}
-
-static __inline void
-_mm_mfence (void)
-{
-  __builtin_ia32_mfence ();
-}
-
-static __inline __m128i
-_mm_cvtsi32_si128 (int __A)
-{
-  return (__m128i) __builtin_ia32_loadd (&__A);
-}
-
-#ifdef __x86_64__
-static __inline __m128i
-_mm_cvtsi64x_si128 (long long __A)
-{
-  return (__m128i) __builtin_ia32_movq2dq (__A);
-}
-#endif
-
-static __inline int
-_mm_cvtsi128_si32 (__m128i __A)
-{
-  int __tmp;
-  __builtin_ia32_stored (&__tmp, (__v4si)__A);
-  return __tmp;
-}
-
-#ifdef __x86_64__
-static __inline long long
-_mm_cvtsi128_si64x (__m128i __A)
-{
-  return __builtin_ia32_movdq2q ((__v2di)__A);
-}
-#endif
-
-#endif /* __SSE2__  */
+/* For backward source compatibility.  */
+#include <emmintrin.h>
 
 #endif /* __SSE__ */
 #endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/contrib/gcc/config/ia64/hpux.h b/contrib/gcc/config/ia64/hpux.h
index 56c601b..90303ea 100644
--- a/contrib/gcc/config/ia64/hpux.h
+++ b/contrib/gcc/config/ia64/hpux.h
@@ -49,6 +49,13 @@ do {							\
 	  }						\
 } while (0)
 
+#undef CPP_SPEC
+#define CPP_SPEC \
+  "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}"
+/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD.  These
+   affect only aCC's C++ library (Rogue Wave-derived) which we do not
+   use, and they violate the user's name space.  */
+
 #undef  ASM_EXTRA_SPEC
 #define ASM_EXTRA_SPEC "%{milp32:-milp32} %{mlp64:-mlp64}"
 
@@ -68,6 +75,7 @@ do {							\
 #undef  LIB_SPEC
 #define LIB_SPEC \
   "%{!shared: \
+     %{mt|pthread:-lpthread} \
      %{p:%{!mlp64:-L/usr/lib/hpux32/libp} \
 	 %{mlp64:-L/usr/lib/hpux64/libp} -lprof} \
      %{pg:%{!mlp64:-L/usr/lib/hpux32/libp} \
@@ -134,6 +142,10 @@ do {								\
 #undef TARGET_HPUX_LD
 #define TARGET_HPUX_LD	1
 
+/* The HPUX dynamic linker objects to weak symbols with no
+   definitions, so do not use them in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
 /* Put out the needed function declarations at the end.  */
 
 #define ASM_FILE_END(STREAM) ia64_hpux_asm_file_end(STREAM)
@@ -144,6 +156,10 @@ do {								\
 #undef DTORS_SECTION_ASM_OP
 #define DTORS_SECTION_ASM_OP  "\t.section\t.fini_array,\t\"aw\",\"fini_array\""
 
+/* The init_array/fini_array technique does not permit the use of
+   initialization priorities.  */
+#define SUPPORTS_INIT_PRIORITY 0
+
 #undef READONLY_DATA_SECTION_ASM_OP
 #define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata,\t\"a\",\t\"progbits\""
 
diff --git a/contrib/gcc/config/ia64/ia64-protos.h b/contrib/gcc/config/ia64/ia64-protos.h
index 72c2279..f25bb02 100644
--- a/contrib/gcc/config/ia64/ia64-protos.h
+++ b/contrib/gcc/config/ia64/ia64-protos.h
@@ -135,6 +135,9 @@ extern void ia64_override_options PARAMS((void));
 extern int ia64_dbx_register_number PARAMS((int));
 extern bool ia64_function_ok_for_sibcall PARAMS ((tree));
 
+extern rtx ia64_return_addr_rtx PARAMS ((HOST_WIDE_INT, rtx));
+extern void ia64_split_return_addr_rtx PARAMS ((rtx));
+
 #ifdef SDATA_SECTION_ASM_OP
 extern void sdata_section PARAMS ((void));
 #endif
diff --git a/contrib/gcc/config/ia64/ia64.c b/contrib/gcc/config/ia64/ia64.c
index 0f34d8f..12f3204 100644
--- a/contrib/gcc/config/ia64/ia64.c
+++ b/contrib/gcc/config/ia64/ia64.c
@@ -175,8 +175,9 @@ static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
 					     tree, rtx));
 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
 					     tree, rtx));
-static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
-						 tree, rtx));
+static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
+						 enum machine_mode,
+						 int, tree, rtx));
 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
 						  tree, rtx));
 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
@@ -1154,6 +1155,7 @@ ia64_expand_move (op0, op1)
       if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
 	{
 	  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
+	  rtx orig_op0 = op0;
 
 	  switch (tls_kind)
 	    {
@@ -1177,8 +1179,10 @@ ia64_expand_move (op0, op1)
 	      insns = get_insns ();
 	      end_sequence ();
 
+	      if (GET_MODE (op0) != Pmode)
+		op0 = tga_ret;
 	      emit_libcall_block (insns, op0, tga_ret, op1);
-	      return NULL_RTX;
+	      break;
 
 	    case TLS_MODEL_LOCAL_DYNAMIC:
 	      /* ??? This isn't the completely proper way to do local-dynamic
@@ -1206,20 +1210,15 @@ ia64_expand_move (op0, op1)
 	      tmp = gen_reg_rtx (Pmode);
 	      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
 
-	      if (register_operand (op0, Pmode))
-		tga_ret = op0;
-	      else
-		tga_ret = gen_reg_rtx (Pmode);
+	      if (!register_operand (op0, Pmode))
+		op0 = gen_reg_rtx (Pmode);
 	      if (TARGET_TLS64)
 		{
-		  emit_insn (gen_load_dtprel (tga_ret, op1));
-		  emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
+		  emit_insn (gen_load_dtprel (op0, op1));
+		  emit_insn (gen_adddi3 (op0, tmp, op0));
 		}
 	      else
-		emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
-	      if (tga_ret == op0)
-		return NULL_RTX;
-	      op1 = tga_ret;
+		emit_insn (gen_add_dtprel (op0, tmp, op1));
 	      break;
 
 	    case TLS_MODEL_INITIAL_EXEC:
@@ -1229,35 +1228,32 @@ ia64_expand_move (op0, op1)
 	      RTX_UNCHANGING_P (tmp) = 1;
 	      tmp = force_reg (Pmode, tmp);
 
-	      if (register_operand (op0, Pmode))
-		op1 = op0;
-	      else
-		op1 = gen_reg_rtx (Pmode);
-	      emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
-	      if (op1 == op0)
-		return NULL_RTX;
+	      if (!register_operand (op0, Pmode))
+		op0 = gen_reg_rtx (Pmode);
+	      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
 	      break;
 
 	    case TLS_MODEL_LOCAL_EXEC:
-	      if (register_operand (op0, Pmode))
-		tmp = op0;
-	      else
-		tmp = gen_reg_rtx (Pmode);
+	      if (!register_operand (op0, Pmode))
+		op0 = gen_reg_rtx (Pmode);
 	      if (TARGET_TLS64)
 		{
-		  emit_insn (gen_load_tprel (tmp, op1));
-		  emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
+		  emit_insn (gen_load_tprel (op0, op1));
+		  emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
 		}
 	      else
-		emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
-	      if (tmp == op0)
-		return NULL_RTX;
-	      op1 = tmp;
+		emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
 	      break;
 
 	    default:
 	      abort ();
 	    }
+
+	  if (orig_op0 == op0)
+	    return NULL_RTX;
+	  if (GET_MODE (orig_op0) == Pmode)
+	    return op0;
+	  return gen_lowpart (GET_MODE (orig_op0), op0);
 	}
       else if (!TARGET_NO_PIC &&
 	       (symbolic_operand (op1, Pmode) ||
@@ -2015,10 +2011,6 @@ ia64_initial_elimination_offset (from, to)
 	abort ();
       break;
 
-    case RETURN_ADDRESS_POINTER_REGNUM:
-      offset = 0;
-      break;
-
     default:
       abort ();
     }
@@ -2369,17 +2361,6 @@ ia64_expand_prologue ()
       reg_names[current_frame_info.reg_fp] = tmp;
     }
 
-  /* Fix up the return address placeholder.  */
-  /* ??? We can fail if __builtin_return_address is used, and we didn't
-     allocate a register in which to save b0.  I can't think of a way to
-     eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
-     then be sure that I got the right one.  Further, reload doesn't seem
-     to care if an eliminable register isn't used, and "eliminates" it
-     anyway.  */
-  if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
-      && current_frame_info.reg_save_b0 != 0)
-    XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
-
   /* We don't need an alloc instruction if we've used no outputs or locals.  */
   if (current_frame_info.n_local_regs == 0
       && current_frame_info.n_output_regs == 0
@@ -2936,6 +2917,72 @@ ia64_direct_return ()
   return 0;
 }
 
+/* Return the magic cookie that we use to hold the return address
+   during early compilation.  */
+
+rtx
+ia64_return_addr_rtx (count, frame)
+     HOST_WIDE_INT count;
+     rtx frame ATTRIBUTE_UNUSED;
+{
+  if (count != 0)
+    return NULL;
+  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
+}
+
+/* Split this value after reload, now that we know where the return
+   address is saved.  */
+
+void
+ia64_split_return_addr_rtx (dest)
+     rtx dest;
+{
+  rtx src;
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      if (current_frame_info.reg_save_b0 != 0)
+	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
+      else
+	{
+	  HOST_WIDE_INT off;
+	  unsigned int regno;
+
+	  /* Compute offset from CFA for BR0.  */
+	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
+	  off = (current_frame_info.spill_cfa_off
+		 + current_frame_info.spill_size);
+	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	      off -= 8;
+
+	  /* Convert CFA offset to a register based offset.  */
+	  if (frame_pointer_needed)
+	    src = hard_frame_pointer_rtx;
+	  else
+	    {
+	      src = stack_pointer_rtx;
+	      off += current_frame_info.total_size;
+	    }
+
+	  /* Load address into scratch register.  */
+	  if (CONST_OK_FOR_I (off))
+	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
+	  else
+	    {
+	      emit_move_insn (dest, GEN_INT (off));
+	      emit_insn (gen_adddi3 (dest, src, dest));
+	    }
+
+	  src = gen_rtx_MEM (Pmode, dest);
+	}
+    }
+  else
+    src = gen_rtx_REG (DImode, BR_REG (0));
+
+  emit_move_insn (dest, src);
+}
+
 int
 ia64_hard_regno_rename_ok (from, to)
      int from;
@@ -3085,9 +3132,6 @@ ia64_output_function_epilogue (file, size)
 {
   int i;
 
-  /* Reset from the function's potential modifications.  */
-  XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
-
   if (current_frame_info.reg_fp)
     {
       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
@@ -7060,7 +7104,8 @@ ia64_emit_nops ()
 	    {
 	      while (bundle_pos < 3)
 		{
-		  emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
+		  if (b->t[bundle_pos] != TYPE_L)
+		    emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
 		  bundle_pos++;
 		}
 	      continue;
@@ -7666,11 +7711,16 @@ ia64_init_builtins ()
 				psi_type_node, integer_type_node,
 				integer_type_node, NULL_TREE);
 
-  /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
+  /* __sync_val_compare_and_swap_di */
   tree di_ftype_pdi_di_di
     = build_function_type_list (long_integer_type_node,
 				pdi_type_node, long_integer_type_node,
 				long_integer_type_node, NULL_TREE);
+  /* __sync_bool_compare_and_swap_di */
+  tree si_ftype_pdi_di_di
+    = build_function_type_list (integer_type_node,
+				pdi_type_node, long_integer_type_node,
+				long_integer_type_node, NULL_TREE);
   /* __sync_synchronize */
   tree void_ftype_void
     = build_function_type (void_type_node, void_list_node);
@@ -7703,7 +7753,7 @@ ia64_init_builtins ()
 	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
   def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
 	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
-  def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
+  def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
 	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
 
   def_builtin ("__sync_synchronize", void_ftype_void,
@@ -7943,7 +7993,8 @@ ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
 */
 
 static rtx
-ia64_expand_compare_and_swap (mode, boolp, arglist, target)
+ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
+     enum machine_mode rmode;
      enum machine_mode mode;
      int boolp;
      tree arglist;
@@ -7991,7 +8042,7 @@ ia64_expand_compare_and_swap (mode, boolp, arglist, target)
   if (boolp)
     {
       if (! target)
-	target = gen_reg_rtx (mode);
+	target = gen_reg_rtx (rmode);
       return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
     }
   else
@@ -8066,11 +8117,16 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore)
   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
   tree arglist = TREE_OPERAND (exp, 1);
+  enum machine_mode rmode = VOIDmode;
 
   switch (fcode)
     {
     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
+      mode = SImode;
+      rmode = SImode;
+      break;
+
     case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
     case IA64_BUILTIN_LOCK_RELEASE_SI:
     case IA64_BUILTIN_FETCH_AND_ADD_SI:
@@ -8089,7 +8145,15 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore)
       break;
 
     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
+      mode = DImode;
+      rmode = SImode;
+      break;
+
     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
+      mode = DImode;
+      rmode = DImode;
+      break;
+
     case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
     case IA64_BUILTIN_LOCK_RELEASE_DI:
     case IA64_BUILTIN_FETCH_AND_ADD_DI:
@@ -8115,11 +8179,13 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore)
     {
     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
-      return ia64_expand_compare_and_swap (mode, 1, arglist, target);
+      return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
+					   target);
 
     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
-      return ia64_expand_compare_and_swap (mode, 0, arglist, target);
+      return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
+					   target);
 
     case IA64_BUILTIN_SYNCHRONIZE:
       emit_insn (gen_mf ());
@@ -8368,6 +8434,9 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
   emit_note (NULL, NOTE_INSN_PROLOGUE_END);
 
   this = gen_rtx_REG (Pmode, IN_REG (0));
+  if (TARGET_ILP32)
+    emit_insn (gen_ptr_extend (this,
+			       gen_rtx_REG (ptr_mode, IN_REG (0))));
 
   /* Apply the constant offset, if required.  */
   if (delta)
@@ -8389,7 +8458,14 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
       rtx tmp = gen_rtx_REG (Pmode, 2);
 
-      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
+      if (TARGET_ILP32)
+	{
+	  rtx t = gen_rtx_REG (ptr_mode, 2);
+	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
+	  emit_insn (gen_ptr_extend (tmp, t));
+	}
+      else
+	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
 
       if (!CONST_OK_FOR_J (vcall_offset))
 	{
@@ -8399,7 +8475,11 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
 	}
       emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
 
-      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+      if (TARGET_ILP32)
+	emit_move_insn (gen_rtx_REG (ptr_mode, 2), 
+			gen_rtx_MEM (ptr_mode, tmp));
+      else
+	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
 
       emit_insn (gen_adddi3 (this, this, tmp));
     }
diff --git a/contrib/gcc/config/ia64/ia64.h b/contrib/gcc/config/ia64/ia64.h
index 724405a..60d0143 100644
--- a/contrib/gcc/config/ia64/ia64.h
+++ b/contrib/gcc/config/ia64/ia64.h
@@ -436,7 +436,7 @@ while (0)
    64 predicate registers, 8 branch registers, one frame pointer,
    and several "application" registers.  */
 
-#define FIRST_PSEUDO_REGISTER 335
+#define FIRST_PSEUDO_REGISTER 334
 
 /* Ranges for the various kinds of registers.  */
 #define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3)
@@ -445,9 +445,7 @@ while (0)
 #define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319)
 #define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327)
 #define GENERAL_REGNO_P(REGNO) \
-  (GR_REGNO_P (REGNO)							\
-   || (REGNO) == FRAME_POINTER_REGNUM					\
-   || (REGNO) == RETURN_ADDRESS_POINTER_REGNUM)
+  (GR_REGNO_P (REGNO) || (REGNO) == FRAME_POINTER_REGNUM)
 
 #define GR_REG(REGNO) ((REGNO) + 0)
 #define FR_REG(REGNO) ((REGNO) + 128)
@@ -457,11 +455,11 @@ while (0)
 #define IN_REG(REGNO) ((REGNO) + 112)
 #define LOC_REG(REGNO) ((REGNO) + 32)
 
-#define AR_CCV_REGNUM	330
-#define AR_UNAT_REGNUM  331
-#define AR_PFS_REGNUM	332
-#define AR_LC_REGNUM	333
-#define AR_EC_REGNUM	334
+#define AR_CCV_REGNUM	329
+#define AR_UNAT_REGNUM  330
+#define AR_PFS_REGNUM	331
+#define AR_LC_REGNUM	332
+#define AR_EC_REGNUM	333
 
 #define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7))
 #define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79))
@@ -524,8 +522,8 @@ while (0)
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
   /* Branch registers.  */				\
   0, 0, 0, 0, 0, 0, 0, 0,				\
-  /*FP RA CCV UNAT PFS LC EC */				\
-     1, 1,  1,   1,  1, 0, 1				\
+  /*FP CCV UNAT PFS LC EC */				\
+     1,  1,   1,  1, 0, 1				\
  }
 
 /* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered
@@ -559,8 +557,8 @@ while (0)
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
   /* Branch registers.  */				\
   1, 0, 0, 0, 0, 0, 1, 1,				\
-  /*FP RA CCV UNAT PFS LC EC */				\
-     1, 1,  1,   1,  1, 0, 1				\
+  /*FP CCV UNAT PFS LC EC */				\
+     1,  1,   1,  1, 0, 1				\
 }
 
 /* Like `CALL_USED_REGISTERS' but used to overcome a historical
@@ -597,8 +595,8 @@ while (0)
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
   /* Branch registers.  */				\
   1, 0, 0, 0, 0, 0, 1, 1,				\
-  /*FP RA CCV UNAT PFS LC EC */				\
-     0, 0,  1,   0,  1, 0, 0				\
+  /*FP CCV UNAT PFS LC EC */				\
+     0,  1,   0,  1, 0, 0				\
 }
 
 
@@ -744,7 +742,7 @@ while (0)
   /* Special branch registers.  */					   \
   R_BR (0),								   \
   /* Other fixed registers.  */						   \
-  FRAME_POINTER_REGNUM, RETURN_ADDRESS_POINTER_REGNUM,			   \
+  FRAME_POINTER_REGNUM, 						   \
   AR_CCV_REGNUM, AR_UNAT_REGNUM, AR_PFS_REGNUM, AR_LC_REGNUM,		   \
   AR_EC_REGNUM		  						   \
 }
@@ -873,11 +871,11 @@ enum reg_class
   /* AR_M_REGS.  */					\
   { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
     0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
-    0x00000000, 0x00000000, 0x0C00 },			\
+    0x00000000, 0x00000000, 0x0600 },			\
   /* AR_I_REGS.  */					\
   { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
     0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
-    0x00000000, 0x00000000, 0x7000 },			\
+    0x00000000, 0x00000000, 0x3800 },			\
   /* ADDL_REGS.  */					\
   { 0x0000000F, 0x00000000, 0x00000000, 0x00000000,	\
     0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
@@ -885,7 +883,7 @@ enum reg_class
   /* GR_REGS.  */					\
   { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
     0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
-    0x00000000, 0x00000000, 0x0300 },			\
+    0x00000000, 0x00000000, 0x0100 },			\
   /* FR_REGS.  */					\
   { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
@@ -893,15 +891,15 @@ enum reg_class
   /* GR_AND_BR_REGS.  */				\
   { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
     0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
-    0x00000000, 0x00000000, 0x03FF },			\
+    0x00000000, 0x00000000, 0x01FF },			\
   /* GR_AND_FR_REGS.  */				\
   { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
-    0x00000000, 0x00000000, 0x0300 },			\
+    0x00000000, 0x00000000, 0x0100 },			\
   /* ALL_REGS.  */					\
   { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
-    0xFFFFFFFF, 0xFFFFFFFF, 0x7FFF },			\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF },			\
 }
 
 /* A C expression whose value is a register class containing hard register
@@ -1119,7 +1117,7 @@ enum reg_class
    DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush).  */
 
 #define RETURN_ADDR_RTX(COUNT, FRAME) \
-  ((COUNT) == 0 ? return_address_pointer_rtx : const0_rtx)
+  ia64_return_addr_rtx (COUNT, FRAME)
 
 /* A C expression whose value is RTL representing the location of the incoming
    return address at the beginning of any function, before the prologue.  This
@@ -1180,13 +1178,6 @@ enum reg_class
       REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = 64;	\
   } while (0)
 
-/* The register number for the return address register.  For IA-64, this
-   is not actually a pointer as the name suggests, but that's a name that
-   gen_rtx_REG already takes care to keep unique.  We modify
-   return_address_pointer_rtx in ia64_expand_prologue to reference the
-   final output regnum.  */
-#define RETURN_ADDRESS_POINTER_REGNUM 329
-
 /* Register numbers used for passing a function's static chain pointer.  */
 /* ??? The ABI sez the static chain should be passed as a normal parameter.  */
 #define STATIC_CHAIN_REGNUM 15
@@ -1210,7 +1201,6 @@ enum reg_class
   {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},			\
   {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
   {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},			\
-  {RETURN_ADDRESS_POINTER_REGNUM, BR_REG (0)},				\
 }
 
 /* A C expression that returns nonzero if the compiler is allowed to try to
@@ -1934,8 +1924,8 @@ do {									\
   "p60", "p61", "p62", "p63",						\
   /* Branch registers.  */						\
   "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",			\
-  /* Frame pointer.  Return address.  */				\
-  "sfp", "retaddr", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec",	\
+  /* Frame pointer.  Application registers.  */				\
+  "sfp", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec",	\
 }
 
 /* If defined, a C initializer for an array of structures containing a name and
diff --git a/contrib/gcc/config/ia64/ia64.md b/contrib/gcc/config/ia64/ia64.md
index 4baa5d3..3b88d9f 100644
--- a/contrib/gcc/config/ia64/ia64.md
+++ b/contrib/gcc/config/ia64/ia64.md
@@ -73,6 +73,7 @@
    (UNSPEC_BUNDLE_SELECTOR	23)
    (UNSPEC_ADDP4		24)
    (UNSPEC_PROLOGUE_USE		25)
+   (UNSPEC_RET_ADDR		26)
   ])
 
 (define_constants
@@ -416,6 +417,25 @@
   DONE;
 })
 
+;; This is used as a placeholder for the return address during early
+;; compilation.  We won't know where we've placed this until during
+;; reload, at which point it can wind up in b0, a general register,
+;; or memory.  The only safe destination under these conditions is a
+;; general register.
+
+(define_insn_and_split "*movdi_ret_addr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_RET_ADDR))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_return_addr_rtx (operands[0]);
+  DONE;
+}
+  [(set_attr "itanium_class" "ialu")])
+
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "destination_operand"
 		    "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b, r,*e, r,*d, r,*c")
diff --git a/contrib/gcc/config/ia64/ia64intrin.h b/contrib/gcc/config/ia64/ia64intrin.h
index c7bbd33..262dc20 100644
--- a/contrib/gcc/config/ia64/ia64intrin.h
+++ b/contrib/gcc/config/ia64/ia64intrin.h
@@ -19,13 +19,11 @@ extern long __sync_val_compare_and_swap_di (long *, long, long);
     __sync_val_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW)))
 
 extern int __sync_bool_compare_and_swap_si (int *, int, int);
-extern long __sync_bool_compare_and_swap_di (long *, long, long);
+extern int __sync_bool_compare_and_swap_di (long *, long, long);
 #define __sync_bool_compare_and_swap(PTR, OLD, NEW)			\
  ((sizeof (*(PTR)) == sizeof(int))					\
-  ? (__typeof__(*(PTR)))						\
-    __sync_bool_compare_and_swap_si((int *)(PTR),(int)(OLD),(int)(NEW))	\
-  : (__typeof__(*(PTR)))						\
-    __sync_bool_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW)))
+  ? __sync_bool_compare_and_swap_si((int *)(PTR),(int)(OLD),(int)(NEW))	\
+  : __sync_bool_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW)))
 
 extern void __sync_lock_release_si (int *);
 extern void __sync_lock_release_di (long *);
diff --git a/contrib/gcc/config/ia64/libgcc-ia64.ver b/contrib/gcc/config/ia64/libgcc-ia64.ver
index 2ffb693..cd76990 100644
--- a/contrib/gcc/config/ia64/libgcc-ia64.ver
+++ b/contrib/gcc/config/ia64/libgcc-ia64.ver
@@ -7,3 +7,6 @@ GCC_3.0 {
   __ia64_trampoline
   __ia64_backtrace
 }
+GCC_3.3.2 {
+  _Unwind_GetBSP
+}
diff --git a/contrib/gcc/config/ia64/unwind-ia64.c b/contrib/gcc/config/ia64/unwind-ia64.c
index 12e46ae..88d236b 100644
--- a/contrib/gcc/config/ia64/unwind-ia64.c
+++ b/contrib/gcc/config/ia64/unwind-ia64.c
@@ -1665,6 +1665,14 @@ _Unwind_GetCFA (struct _Unwind_Context *context)
   return (_Unwind_Ptr) context->psp;
 }
 
+/* Get the value of the Backing Store Pointer as saved in CONTEXT.  */
+
+_Unwind_Word
+_Unwind_GetBSP (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bsp;
+}
+
 
 static _Unwind_Reason_Code
 uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
@@ -1786,6 +1794,11 @@ uw_update_reg_address (struct _Unwind_Context *context,
 	addr = ia64_rse_skip_regs ((unsigned long *) context->bsp, rval - 32);
       else if (rval >= 2)
 	addr = context->ireg[rval - 2].loc;
+      else if (rval == 0)
+	{
+	  static const unsigned long dummy;
+	  addr = (void *) &dummy;
+	}
       else
 	abort ();
       break;
@@ -1837,6 +1850,11 @@ uw_update_reg_address (struct _Unwind_Context *context,
 	    context->ireg[regno - UNW_REG_R2].nat
 	      = context->ireg[rval - 2].nat;
 	  }
+	else if (rval == 0)
+	  {
+	    context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_NONE;
+	    context->ireg[regno - UNW_REG_R2].nat.off = 0;
+	  }
 	else
 	  abort ();
 	break;
diff --git a/contrib/gcc/config/linux.h b/contrib/gcc/config/linux.h
index 2ddb9c7..297fa92 100644
--- a/contrib/gcc/config/linux.h
+++ b/contrib/gcc/config/linux.h
@@ -108,6 +108,9 @@ Boston, MA 02111-1307, USA.  */
 #define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
 #endif
 
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
 /* Define this so we can compile MS code for use with WINE.  */
 #define HANDLE_PRAGMA_PACK_PUSH_POP
 
diff --git a/contrib/gcc/config/rs6000/linux.h b/contrib/gcc/config/rs6000/linux.h
index 593e961..15db88a 100644
--- a/contrib/gcc/config/rs6000/linux.h
+++ b/contrib/gcc/config/rs6000/linux.h
@@ -48,6 +48,9 @@ Boston, MA 02111-1307, USA.  */
 #undef LINK_SHLIB_SPEC
 #define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
 
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
 #undef	LIB_DEFAULT_SPEC
 #define LIB_DEFAULT_SPEC "%(lib_linux)"
 
diff --git a/contrib/gcc/config/rs6000/linux64.h b/contrib/gcc/config/rs6000/linux64.h
index 55065c6..a538d02 100644
--- a/contrib/gcc/config/rs6000/linux64.h
+++ b/contrib/gcc/config/rs6000/linux64.h
@@ -137,6 +137,9 @@ Boston, MA 02111-1307, USA.  */
 #undef  LINK_SHLIB_SPEC
 #define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
 
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
 #undef  LIB_DEFAULT_SPEC
 #define LIB_DEFAULT_SPEC "%(lib_linux)"
 
diff --git a/contrib/gcc/config/rs6000/rs6000.c b/contrib/gcc/config/rs6000/rs6000.c
index 3c17f21..13f4ed3 100644
--- a/contrib/gcc/config/rs6000/rs6000.c
+++ b/contrib/gcc/config/rs6000/rs6000.c
@@ -3310,7 +3310,7 @@ function_arg_pass_by_reference (cum, mode, type, named)
 
       return 1;
     }
-  return type && int_size_in_bytes (type) <= 0;
+  return type && int_size_in_bytes (type) < 0;
 }
 
 /* Perform any needed actions needed for a function that is receiving a
@@ -3551,7 +3551,7 @@ rs6000_va_arg (valist, type)
   if (DEFAULT_ABI != ABI_V4)
     {
       /* Variable sized types are passed by reference.  */
-      if (int_size_in_bytes (type) <= 0)
+      if (int_size_in_bytes (type) < 0)
 	{
 	  u = build_pointer_type (type);
 
diff --git a/contrib/gcc/config/rs6000/rs6000.md b/contrib/gcc/config/rs6000/rs6000.md
index 4c95031..4d5ef9d 100644
--- a/contrib/gcc/config/rs6000/rs6000.md
+++ b/contrib/gcc/config/rs6000/rs6000.md
@@ -14308,7 +14308,7 @@
     return \"bdz $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrsi_internal2"
   [(set (pc)
@@ -14332,7 +14332,7 @@
     return \"{bdn|bdnz} $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrdi_internal1"
   [(set (pc)
@@ -14356,7 +14356,7 @@
     return \"bdz $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrdi_internal2"
   [(set (pc)
@@ -14380,7 +14380,7 @@
     return \"{bdn|bdnz} $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 ;; Similar, but we can use GE since we have a REG_NONNEG.
 
@@ -14406,7 +14406,7 @@
     return \"bdz $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrsi_internal4"
   [(set (pc)
@@ -14430,7 +14430,7 @@
     return \"{bdn|bdnz} $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrdi_internal3"
   [(set (pc)
@@ -14454,7 +14454,7 @@
     return \"bdz $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrdi_internal4"
   [(set (pc)
@@ -14478,7 +14478,7 @@
     return \"{bdn|bdnz} $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 ;; Similar but use EQ
 
@@ -14504,7 +14504,7 @@
     return \"{bdn|bdnz} $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrsi_internal6"
   [(set (pc)
@@ -14528,7 +14528,7 @@
     return \"bdz $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrdi_internal5"
   [(set (pc)
@@ -14552,7 +14552,7 @@
     return \"{bdn|bdnz} $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 (define_insn "*ctrdi_internal6"
   [(set (pc)
@@ -14576,7 +14576,7 @@
     return \"bdz $+8\;b %l0\";
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4,12,16")])
+   (set_attr "length" "*,12,16")])
 
 ;; Now the splitters if we could not allocate the CTR register
 
diff --git a/contrib/gcc/config/rs6000/sysv4.h b/contrib/gcc/config/rs6000/sysv4.h
index 3da996f..26450e7 100644
--- a/contrib/gcc/config/rs6000/sysv4.h
+++ b/contrib/gcc/config/rs6000/sysv4.h
@@ -383,6 +383,21 @@ do {									\
 #undef	STACK_BOUNDARY
 #define	STACK_BOUNDARY	(TARGET_ALTIVEC_ABI ? 128 : 64)
 
+/* Define this macro if you wish to preserve a certain alignment for
+   the stack pointer, greater than what the hardware enforces.  The
+   definition is a C expression for the desired alignment (measured
+   in bits).  This macro must evaluate to a value equal to or larger
+   than STACK_BOUNDARY.
+   For the SYSV ABI and variants the alignment of the stack pointer
+   is usually controlled manually in rs6000.c. However, to maintain
+   alignment across alloca () in all circumstances,
+   PREFERRED_STACK_BOUNDARY needs to be set as well.
+   This has the additional advantage of allowing a bigger maximum
+   alignment of user objects on the stack.  */
+
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY 128
+
 /* Real stack boundary as mandated by the appropriate ABI.  */
 #define ABI_STACK_BOUNDARY ((TARGET_EABI && !TARGET_ALTIVEC_ABI) ? 64 : 128)
 
diff --git a/contrib/gcc/config/sparc/linux.h b/contrib/gcc/config/sparc/linux.h
index ea16b7eed..e4bd7bb 100644
--- a/contrib/gcc/config/sparc/linux.h
+++ b/contrib/gcc/config/sparc/linux.h
@@ -256,6 +256,10 @@ do {									\
 #undef CTORS_SECTION_ASM_OP
 #undef DTORS_SECTION_ASM_OP
 
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
 /* Do code reading to identify a signal frame, and set the frame
    state data appropriately.  See unwind-dw2.c for the structs.  */
 
diff --git a/contrib/gcc/config/sparc/linux64.h b/contrib/gcc/config/sparc/linux64.h
index 8fd3a1f..27be1d1 100644
--- a/contrib/gcc/config/sparc/linux64.h
+++ b/contrib/gcc/config/sparc/linux64.h
@@ -319,6 +319,10 @@ do {									\
 #undef CTORS_SECTION_ASM_OP
 #undef DTORS_SECTION_ASM_OP
 
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
 /* Do code reading to identify a signal frame, and set the frame
    state data appropriately.  See unwind-dw2.c for the structs.  */
 
diff --git a/contrib/gcc/config/sparc/sol2-c1.asm b/contrib/gcc/config/sparc/sol2-c1.asm
index 894a8c3..a1cc68d 100644
--- a/contrib/gcc/config/sparc/sol2-c1.asm
+++ b/contrib/gcc/config/sparc/sol2-c1.asm
@@ -92,6 +92,10 @@ _start:
 	! access those data anyway.  Instead, go straight to main:
 	mov	%l0, %o0	! argc
 	mov	%l1, %o1	! argv
+#ifdef GCRT1
+	setn(___Argv, %o4, %o3)
+	stn	%o1, [%o3]      ! *___Argv
+#endif
 	! Skip argc words past argv, to env:
 	sll	%l0, CPTRSHIFT, %o2
 	add	%o2, CPTRSIZE, %o2
diff --git a/contrib/gcc/config/sparc/sparc.c b/contrib/gcc/config/sparc/sparc.c
index 5b03f05..79022b4 100644
--- a/contrib/gcc/config/sparc/sparc.c
+++ b/contrib/gcc/config/sparc/sparc.c
@@ -8028,6 +8028,10 @@ sparc_v8plus_shift (operands, insn, opcode)
   if (which_alternative != 2)
     operands[3] = operands[0];
 
+  /* We can only shift by constants <= 63. */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
   if (GET_CODE (operands[1]) == CONST_INT)
     {
       output_asm_insn ("mov\t%1, %3", operands);
diff --git a/contrib/gcc/config/sparc/sparc.md b/contrib/gcc/config/sparc/sparc.md
index ebe9d2b..b53013e 100644
--- a/contrib/gcc/config/sparc/sparc.md
+++ b/contrib/gcc/config/sparc/sparc.md
@@ -148,8 +148,12 @@
 	 (eq_attr "branch_type" "fcc")
 	   (if_then_else (match_operand 0 "fcc0_reg_operand" "")
 	     (if_then_else (eq_attr "empty_delay_slot" "true")
-	       (const_int 2)
-	       (const_int 1))
+	       (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0))
+		 (const_int 3)
+		 (const_int 2))
+	       (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0))
+		 (const_int 2)
+		 (const_int 1)))
 	     (if_then_else (lt (pc) (match_dup 2))
 	       (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 260000))
 		 (if_then_else (eq_attr "empty_delay_slot" "true")
@@ -6881,6 +6885,8 @@
 {
   if (operands[2] == const1_rtx)
     return "add\t%1, %1, %0";
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
   return "sll\t%1, %2, %0";
 }
   [(set (attr "type")
@@ -6910,6 +6916,8 @@
 {
   if (operands[2] == const1_rtx)
     return "add\t%1, %1, %0";
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
   return "sllx\t%1, %2, %0";
 }
   [(set (attr "type")
@@ -6969,7 +6977,11 @@
 	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
 		     (match_operand:SI 2 "arith_operand" "rI")))]
   ""
-  "sra\t%1, %2, %0"
+  {
+     if (GET_CODE (operands[2]) == CONST_INT)
+       operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+     return "sra\t%1, %2, %0";
+  }
   [(set_attr "type" "shift")])
 
 (define_insn "*ashrsi3_extend"
@@ -7016,12 +7028,17 @@
     }
 })
 
-(define_insn ""
+(define_insn "*ashrdi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
 		     (match_operand:SI 2 "arith_operand" "rI")))]
   "TARGET_ARCH64"
-  "srax\t%1, %2, %0"
+  
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+    return "srax\t%1, %2, %0";
+  }
   [(set_attr "type" "shift")])
 
 ;; XXX
@@ -7040,7 +7057,11 @@
 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
 		     (match_operand:SI 2 "arith_operand" "rI")))]
   ""
-  "srl\t%1, %2, %0"
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+    return "srl\t%1, %2, %0";
+  }
   [(set_attr "type" "shift")])
 
 ;; This handles the case where
@@ -7097,12 +7118,16 @@
     }
 })
 
-(define_insn ""
+(define_insn "*lshrdi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
 		     (match_operand:SI 2 "arith_operand" "rI")))]
   "TARGET_ARCH64"
-  "srlx\t%1, %2, %0"
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+    return "srlx\t%1, %2, %0";
+  }
   [(set_attr "type" "shift")])
 
 ;; XXX
diff --git a/contrib/gcc/config/t-darwin b/contrib/gcc/config/t-darwin
index 7fe5e93..c823fa5 100644
--- a/contrib/gcc/config/t-darwin
+++ b/contrib/gcc/config/t-darwin
@@ -12,7 +12,7 @@ gt-darwin.h : s-gtype ; @true
 
 # Explain how to build crt2.o
 $(T)crt2$(objext): $(srcdir)/config/darwin-crt2.c $(GCC_PASSES) \
-	$(TCONFIG_H) tsystem.h
+	$(TCONFIG_H) stmp-int-hdrs tsystem.h
 	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \
 	  -c $(srcdir)/config/darwin-crt2.c -o $(T)crt2$(objext)
 
diff --git a/contrib/gcc/config/t-libunwind b/contrib/gcc/config/t-libunwind
new file mode 100644
index 0000000..be50bc4
--- /dev/null
+++ b/contrib/gcc/config/t-libunwind
@@ -0,0 +1 @@
+LIB2ADDEH = $(srcdir)/unwind-libunwind.c $(srcdir)/unwind-sjlj.c