diff options
Diffstat (limited to 'contrib/gcc/config')
37 files changed, 3065 insertions, 2403 deletions
diff --git a/contrib/gcc/config/alpha/alpha.c b/contrib/gcc/config/alpha/alpha.c index 9657e56..42b57e1 100644 --- a/contrib/gcc/config/alpha/alpha.c +++ b/contrib/gcc/config/alpha/alpha.c @@ -2949,12 +2949,8 @@ alpha_expand_mov (mode, operands) compiled at the end of compilation. In the meantime, someone can re-encode-section-info on some symbol changing it e.g. from global to local-not-small. If this happens, we'd have emitted a plain - load rather than a high+losum load and not recognize the insn. - - So if rtl inlining is in effect, we delay the global/not-global - decision until rest_of_compilation by wrapping it in an - UNSPEC_SYMBOL. */ - if (TARGET_EXPLICIT_RELOCS && flag_inline_functions + load rather than a high+losum load and not recognize the insn. */ + if (TARGET_EXPLICIT_RELOCS && rtx_equal_function_value_matters && global_symbolic_operand (operands[1], mode)) { @@ -3336,10 +3332,10 @@ alpha_emit_conditional_branch (code) if (op1 == const0_rtx) cmp_code = NIL, branch_code = code; - /* We want to use cmpcc/bcc when we can, since there is a zero delay - bypass between logicals and br/cmov on EV5. But we don't want to - force valid immediate constants into registers needlessly. */ - else if (GET_CODE (op1) == CONST_INT) + /* If the constants doesn't fit into an immediate, but can + be generated by lda/ldah, we adjust the argument and + compare against zero, so we can use beq/bne directly. */ + else if (GET_CODE (op1) == CONST_INT && (code == EQ || code == NE)) { HOST_WIDE_INT v = INTVAL (op1), n = -v; @@ -6748,14 +6744,21 @@ alpha_sa_mask (imaskP, fmaskP) /* We need to restore these for the handler. */ if (current_function_calls_eh_return) - for (i = 0; ; ++i) - { - unsigned regno = EH_RETURN_DATA_REGNO (i); - if (regno == INVALID_REGNUM) - break; - imask |= 1L << regno; - } - + { + for (i = 0; ; ++i) + { + unsigned regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + imask |= 1L << regno; + } + + /* Glibc likes to use $31 as an unwind stopper for crt0. To + avoid hackery in unwind-dw2.c, we need to actively store a + zero in the prologue of _Unwind_RaiseException et al. */ + imask |= 1UL << 31; + } + /* If any register spilled, then spill the return address also. */ /* ??? This is required by the Digital stack unwind specification and isn't needed if we're doing Dwarf2 unwinding. */ @@ -7210,7 +7213,7 @@ alpha_expand_prologue () } /* Now save any other registers required to be saved. */ - for (i = 0; i < 32; i++) + for (i = 0; i < 31; i++) if (imask & (1L << i)) { mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset)); @@ -7219,7 +7222,25 @@ alpha_expand_prologue () reg_offset += 8; } - for (i = 0; i < 32; i++) + /* Store a zero if requested for unwinding. */ + if (imask & (1UL << 31)) + { + rtx insn, t; + + mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset)); + set_mem_alias_set (mem, alpha_sr_alias_set); + insn = emit_move_insn (mem, const0_rtx); + + RTX_FRAME_RELATED_P (insn) = 1; + t = gen_rtx_REG (Pmode, 31); + t = gen_rtx_SET (VOIDmode, mem, t); + t = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, t, REG_NOTES (insn)); + REG_NOTES (insn) = t; + + reg_offset += 8; + } + + for (i = 0; i < 31; i++) if (fmask & (1L << i)) { mem = gen_rtx_MEM (DFmode, plus_constant (sa_reg, reg_offset)); @@ -7625,7 +7646,7 @@ alpha_expand_epilogue () reg_offset += 8; imask &= ~(1L << REG_RA); - for (i = 0; i < 32; ++i) + for (i = 0; i < 31; ++i) if (imask & (1L << i)) { if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer) @@ -7639,7 +7660,10 @@ alpha_expand_epilogue () reg_offset += 8; } - for (i = 0; i < 32; ++i) + if (imask & (1UL << 31)) + reg_offset += 8; + + for (i = 0; i < 31; ++i) if (fmask & (1L << i)) { mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset)); diff --git a/contrib/gcc/config/alpha/alpha.h b/contrib/gcc/config/alpha/alpha.h index b933ea3..6b52700 100644 --- a/contrib/gcc/config/alpha/alpha.h +++ b/contrib/gcc/config/alpha/alpha.h @@ -1276,6 +1276,7 @@ do { \ /* Before the prologue, RA lives in $26. */ #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 26) #define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26) +#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64) /* Describe how we implement __builtin_eh_return. */ #define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 16 : INVALID_REGNUM) diff --git a/contrib/gcc/config/alpha/alpha.md b/contrib/gcc/config/alpha/alpha.md index a4c0ae6..f7e9fa4 100644 --- a/contrib/gcc/config/alpha/alpha.md +++ b/contrib/gcc/config/alpha/alpha.md @@ -5399,7 +5399,7 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SYMBOL))] - "TARGET_EXPLICIT_RELOCS && flag_inline_functions" + "TARGET_EXPLICIT_RELOCS" "#" "" [(set (match_dup 0) (match_dup 1))] diff --git a/contrib/gcc/config/alpha/linux.h b/contrib/gcc/config/alpha/linux.h index 0c53344..3a2940c 100644 --- a/contrib/gcc/config/alpha/linux.h +++ b/contrib/gcc/config/alpha/linux.h @@ -61,6 +61,9 @@ Boston, MA 02111-1307, USA. */ #define TARGET_HAS_F_SETLKW +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + /* Do code reading to identify a signal frame, and set the frame state data appropriately. See unwind-dw2.c for the structs. */ @@ -78,6 +81,8 @@ Boston, MA 02111-1307, USA. */ if (pc_[0] != 0x47fe0410 /* mov $30,$16 */ \ || pc_[2] != 0x00000083 /* callsys */) \ break; \ + if ((CONTEXT)->cfa == 0) \ + break; \ if (pc_[1] == 0x201f0067) /* lda $0,NR_sigreturn */ \ sc_ = (CONTEXT)->cfa; \ else if (pc_[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */ \ @@ -106,8 +111,8 @@ Boston, MA 02111-1307, USA. */ (FS)->regs.reg[i_+32].loc.offset \ = (long)&sc_->sc_fpregs[i_] - new_cfa_; \ } \ - (FS)->regs.reg[31].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[31].loc.offset = (long)&sc_->sc_pc - new_cfa_; \ - (FS)->retaddr_column = 31; \ + (FS)->regs.reg[64].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[64].loc.offset = (long)&sc_->sc_pc - new_cfa_; \ + (FS)->retaddr_column = 64; \ goto SUCCESS; \ } while (0) diff --git a/contrib/gcc/config/arm/linux-elf.h b/contrib/gcc/config/arm/linux-elf.h index c6d14e7..8cc812f 100644 --- a/contrib/gcc/config/arm/linux-elf.h +++ b/contrib/gcc/config/arm/linux-elf.h @@ -123,3 +123,6 @@ Boston, MA 02111-1307, USA. */ #undef CC1_SPEC #define CC1_SPEC "%{profile:-p}" + +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" diff --git a/contrib/gcc/config/darwin.h b/contrib/gcc/config/darwin.h index 90d959c..c4b7526 100644 --- a/contrib/gcc/config/darwin.h +++ b/contrib/gcc/config/darwin.h @@ -380,6 +380,10 @@ do { text_section (); \ || DECL_INITIAL (DECL)) \ (* targetm.encode_section_info) (DECL, false); \ ASM_OUTPUT_LABEL (FILE, xname); \ + /* Darwin doesn't support zero-size objects, so give them a \ + byte. */ \ + if (tree_low_cst (DECL_SIZE_UNIT (DECL), 1) == 0) \ + assemble_zeros (1); \ } while (0) #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ @@ -401,6 +405,15 @@ do { text_section (); \ machopic_output_possible_stub_label (FILE, xname); \ } while (0) +#define ASM_DECLARE_CONSTANT_NAME(FILE, NAME, EXP, SIZE) \ + do { \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + /* Darwin doesn't support zero-size objects, so give them a \ + byte. */ \ + if ((SIZE) == 0) \ + assemble_zeros (1); \ + } while (0) + /* Wrap new method names in quotes so the assembler doesn't gag. Make Objective-C internal symbols local. */ diff --git a/contrib/gcc/config/i386/emmintrin.h b/contrib/gcc/config/i386/emmintrin.h new file mode 100644 index 0000000..7007fc5 --- /dev/null +++ b/contrib/gcc/config/i386/emmintrin.h @@ -0,0 +1,1499 @@ +/* Copyright (C) 2003 Free Software Foundation, Inc. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 8.0. */ + +#ifndef _EMMINTRIN_H_INCLUDED +#define _EMMINTRIN_H_INCLUDED + +#ifdef __SSE2__ +#include <xmmintrin.h> + +/* SSE2 */ +typedef int __v2df __attribute__ ((mode (V2DF))); +typedef int __v2di __attribute__ ((mode (V2DI))); +typedef int __v4si __attribute__ ((mode (V4SI))); +typedef int __v8hi __attribute__ ((mode (V8HI))); +typedef int __v16qi __attribute__ ((mode (V16QI))); + +/* Create a selector for use with the SHUFPD instruction. */ +#define _MM_SHUFFLE2(fp1,fp0) \ + (((fp1) << 1) | (fp0)) + +#define __m128i __v2di +#define __m128d __v2df + +/* Create a vector with element 0 as *P and the rest zero. */ +static __inline __m128d +_mm_load_sd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadsd (__P); +} + +/* Create a vector with all two elements equal to *P. */ +static __inline __m128d +_mm_load1_pd (double const *__P) +{ + __v2df __tmp = __builtin_ia32_loadsd (__P); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); +} + +static __inline __m128d +_mm_load_pd1 (double const *__P) +{ + return _mm_load1_pd (__P); +} + +/* Load two DPFP values from P. The address must be 16-byte aligned. */ +static __inline __m128d +_mm_load_pd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadapd (__P); +} + +/* Load two DPFP values from P. The address need not be 16-byte aligned. */ +static __inline __m128d +_mm_loadu_pd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadupd (__P); +} + +/* Load two DPFP values in reverse order. The address must be aligned. */ +static __inline __m128d +_mm_loadr_pd (double const *__P) +{ + __v2df __tmp = __builtin_ia32_loadapd (__P); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); +} + +/* Create a vector with element 0 as F and the rest zero. */ +static __inline __m128d +_mm_set_sd (double __F) +{ + return (__m128d) __builtin_ia32_loadsd (&__F); +} + +/* Create a vector with all two elements equal to F. */ +static __inline __m128d +_mm_set1_pd (double __F) +{ + __v2df __tmp = __builtin_ia32_loadsd (&__F); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); +} + +static __inline __m128d +_mm_set_pd1 (double __F) +{ + return _mm_set1_pd (__F); +} + +/* Create the vector [Z Y]. */ +static __inline __m128d +_mm_set_pd (double __Z, double __Y) +{ + union { + double __a[2]; + __m128d __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} + +/* Create the vector [Y Z]. */ +static __inline __m128d +_mm_setr_pd (double __Z, double __Y) +{ + return _mm_set_pd (__Y, __Z); +} + +/* Create a vector of zeros. */ +static __inline __m128d +_mm_setzero_pd (void) +{ + return (__m128d) __builtin_ia32_setzeropd (); +} + +/* Stores the lower DPFP value. */ +static __inline void +_mm_store_sd (double *__P, __m128d __A) +{ + __builtin_ia32_storesd (__P, (__v2df)__A); +} + +/* Store the lower DPFP value acrosd two words. */ +static __inline void +_mm_store1_pd (double *__P, __m128d __A) +{ + __v2df __va = (__v2df)__A; + __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0)); + __builtin_ia32_storeapd (__P, __tmp); +} + +static __inline void +_mm_store_pd1 (double *__P, __m128d __A) +{ + _mm_store1_pd (__P, __A); +} + +/* Store two DPFP values. The address must be 16-byte aligned. */ +static __inline void +_mm_store_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeapd (__P, (__v2df)__A); +} + +/* Store two DPFP values. The address need not be 16-byte aligned. */ +static __inline void +_mm_storeu_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeupd (__P, (__v2df)__A); +} + +/* Store two DPFP values in reverse order. The address must be aligned. */ +static __inline void +_mm_storer_pd (double *__P, __m128d __A) +{ + __v2df __va = (__v2df)__A; + __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1)); + __builtin_ia32_storeapd (__P, __tmp); +} + +/* Sets the low DPFP value of A from the low value of B. */ +static __inline __m128d +_mm_move_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); +} + + +static __inline __m128d +_mm_add_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_add_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_sub_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_sub_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_mul_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_mul_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_div_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_div_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_sqrt_pd (__m128d __A) +{ + return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); +} + +/* Return pair {sqrt (A[0), B[1]}. */ +static __inline __m128d +_mm_sqrt_sd (__m128d __A, __m128d __B) +{ + __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); + return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); +} + +static __inline __m128d +_mm_min_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_min_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_max_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_max_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_and_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_andnot_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_or_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_xor_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpeq_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmplt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmple_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpgt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpge_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpneq_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnlt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnle_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpngt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnge_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpord_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpunord_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpeq_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmplt_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmple_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpgt_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpltsd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpge_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmplesd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpneq_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnlt_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnle_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpngt_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpnltsd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpnge_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpnlesd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpord_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpunord_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comieq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comilt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comile_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comigt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comige_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comineq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomieq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomilt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomile_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomigt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomige_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomineq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); +} + +/* Create a vector with element 0 as *P and the rest zero. */ + +static __inline __m128i +_mm_load_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqa ((char const *)__P); +} + +static __inline __m128i +_mm_loadu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); +} + +static __inline __m128i +_mm_loadl_epi64 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P); +} + +static __inline void +_mm_store_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B); +} + +static __inline void +_mm_storeu_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); +} + +static __inline void +_mm_storel_epi64 (__m128i *__P, __m128i __B) +{ + *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B); +} + +static __inline __m64 +_mm_movepi64_pi64 (__m128i __B) +{ + return (__m64) __builtin_ia32_movdq2q ((__v2di)__B); +} + +static __inline __m128i +_mm_move_epi64 (__m128i __A) +{ + return (__m128i) __builtin_ia32_movq ((__v2di)__A); +} + +/* Create a vector of zeros. */ +static __inline __m128i +_mm_setzero_si128 (void) +{ + return (__m128i) __builtin_ia32_setzero128 (); +} + +static __inline __m128i +_mm_set_epi64 (__m64 __A, __m64 __B) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp); +} + +/* Create the vector [Z Y X W]. */ +static __inline __m128i +_mm_set_epi32 (int __Z, int __Y, int __X, int __W) +{ + union { + int __a[4]; + __m128i __v; + } __u; + + __u.__a[0] = __W; + __u.__a[1] = __X; + __u.__a[2] = __Y; + __u.__a[3] = __Z; + + return __u.__v; +} + +#ifdef __x86_64__ +/* Create the vector [Z Y]. */ +static __inline __m128i +_mm_set_epi64x (long long __Z, long long __Y) +{ + union { + long __a[2]; + __m128i __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} +#endif + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_set_epi16 (short __Z, short __Y, short __X, short __W, + short __V, short __U, short __T, short __S) +{ + union { + short __a[8]; + __m128i __v; + } __u; + + __u.__a[0] = __S; + __u.__a[1] = __T; + __u.__a[2] = __U; + __u.__a[3] = __V; + __u.__a[4] = __W; + __u.__a[5] = __X; + __u.__a[6] = __Y; + __u.__a[7] = __Z; + + return __u.__v; +} + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_set_epi8 (char __Z, char __Y, char __X, char __W, + char __V, char __U, char __T, char __S, + char __Z1, char __Y1, char __X1, char __W1, + char __V1, char __U1, char __T1, char __S1) +{ + union { + char __a[16]; + __m128i __v; + } __u; + + __u.__a[0] = __S1; + __u.__a[1] = __T1; + __u.__a[2] = __U1; + __u.__a[3] = __V1; + __u.__a[4] = __W1; + __u.__a[5] = __X1; + __u.__a[6] = __Y1; + __u.__a[7] = __Z1; + __u.__a[8] = __S; + __u.__a[9] = __T; + __u.__a[10] = __U; + __u.__a[11] = __V; + __u.__a[12] = __W; + __u.__a[13] = __X; + __u.__a[14] = __Y; + __u.__a[15] = __Z; + + return __u.__v; +} + +static __inline __m128i +_mm_set1_epi64 (__m64 __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp); +} + +static __inline __m128i +_mm_set1_epi32 (int __A) +{ + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +#ifdef __x86_64__ +static __inline __m128i +_mm_set1_epi64x (long long __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0)); +} +#endif + +static __inline __m128i +_mm_set1_epi16 (short __A) +{ + int __Acopy = (unsigned short)__A; + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); + __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_set1_epi8 (char __A) +{ + int __Acopy = (unsigned char)__A; + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); + __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); + __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_setr_epi64 (__m64 __A, __m64 __B) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2); +} + +/* Create the vector [Z Y X W]. */ +static __inline __m128i +_mm_setr_epi32 (int __W, int __X, int __Y, int __Z) +{ + union { + int __a[4]; + __m128i __v; + } __u; + + __u.__a[0] = __W; + __u.__a[1] = __X; + __u.__a[2] = __Y; + __u.__a[3] = __Z; + + return __u.__v; +} +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_setr_epi16 (short __S, short __T, short __U, short __V, + short __W, short __X, short __Y, short __Z) +{ + union { + short __a[8]; + __m128i __v; + } __u; + + __u.__a[0] = __S; + __u.__a[1] = __T; + __u.__a[2] = __U; + __u.__a[3] = __V; + __u.__a[4] = __W; + __u.__a[5] = __X; + __u.__a[6] = __Y; + __u.__a[7] = __Z; + + return __u.__v; +} + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1, + char __W1, char __X1, char __Y1, char __Z1, + char __S, char __T, char __U, char __V, + char __W, char __X, char __Y, char __Z) +{ + union { + char __a[16]; + __m128i __v; + } __u; + + __u.__a[0] = __S1; + __u.__a[1] = __T1; + __u.__a[2] = __U1; + __u.__a[3] = __V1; + __u.__a[4] = __W1; + __u.__a[5] = __X1; + __u.__a[6] = __Y1; + __u.__a[7] = __Z1; + __u.__a[8] = __S; + __u.__a[9] = __T; + __u.__a[10] = __U; + __u.__a[11] = __V; + __u.__a[12] = __W; + __u.__a[13] = __X; + __u.__a[14] = __Y; + __u.__a[15] = __Z; + + return __u.__v; +} + +static __inline __m128d +_mm_cvtepi32_pd (__m128i __A) +{ + return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); +} + +static __inline __m128 +_mm_cvtepi32_ps (__m128i __A) +{ + return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); +} + +static __inline __m128i +_mm_cvtpd_epi32 (__m128d __A) +{ + return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); +} + +static __inline __m64 +_mm_cvtpd_pi32 (__m128d __A) +{ + return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); +} + +static __inline __m128 +_mm_cvtpd_ps (__m128d __A) +{ + return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); +} + +static __inline __m128i +_mm_cvttpd_epi32 (__m128d __A) +{ + return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); +} + +static __inline __m64 +_mm_cvttpd_pi32 (__m128d __A) +{ + return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); +} + +static __inline __m128d +_mm_cvtpi32_pd (__m64 __A) +{ + return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); +} + +static __inline __m128i +_mm_cvtps_epi32 (__m128 __A) +{ + return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); +} + +static __inline __m128i +_mm_cvttps_epi32 (__m128 __A) +{ + return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); +} + +static __inline __m128d +_mm_cvtps_pd (__m128 __A) +{ + return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); +} + +static __inline int +_mm_cvtsd_si32 (__m128d __A) +{ + return __builtin_ia32_cvtsd2si ((__v2df) __A); +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvtsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvtsd2si64 ((__v2df) __A); +} +#endif + +static __inline int +_mm_cvttsd_si32 (__m128d __A) +{ + return __builtin_ia32_cvttsd2si ((__v2df) __A); +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvttsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvttsd2si64 ((__v2df) __A); +} +#endif + +static __inline __m128 +_mm_cvtsd_ss (__m128 __A, __m128d __B) +{ + return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); +} + +static __inline __m128d +_mm_cvtsi32_sd (__m128d __A, int __B) +{ + return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); +} + +#ifdef __x86_64__ +static __inline __m128d +_mm_cvtsi64x_sd (__m128d __A, long long __B) +{ + return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); +} +#endif + +static __inline __m128d +_mm_cvtss_sd (__m128d __A, __m128 __B) +{ + return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); +} + +#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C))) + +static __inline __m128d +_mm_unpackhi_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_unpacklo_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_loadh_pd (__m128d __A, double const *__B) +{ + return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B); +} + +static __inline void +_mm_storeh_pd (double *__A, __m128d __B) +{ + __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_loadl_pd (__m128d __A, double const *__B) +{ + return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B); +} + +static __inline void +_mm_storel_pd (double *__A, __m128d __B) +{ + __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B); +} + +static __inline int +_mm_movemask_pd (__m128d __A) +{ + return __builtin_ia32_movmskpd ((__v2df)__A); +} + +static __inline __m128i +_mm_packs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_packs_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_packus_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_unpackhi_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_unpackhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_unpackhi_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_unpackhi_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_unpacklo_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_unpacklo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_unpacklo_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_unpacklo_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_add_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_add_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_add_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_add_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_adds_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_adds_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_adds_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_adds_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_sub_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_sub_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_sub_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_sub_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_subs_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_subs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_subs_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_subs_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_madd_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_mulhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_mullo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m64 +_mm_mul_su32 (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); +} + +static __inline __m128i +_mm_mul_epu32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_sll_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sll_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sll_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sra_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sra_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_srl_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_srl_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_srl_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_slli_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); +} + +static __inline __m128i +_mm_slli_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); +} + +static __inline __m128i +_mm_slli_epi64 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); +} + +static __inline __m128i +_mm_srai_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); +} + +static __inline __m128i +_mm_srai_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); +} + +#if 0 +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, const int __B) +{ + return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) +} + +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, const int __B) +{ + return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) +} +#endif +#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) +#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) + +static __inline __m128i +_mm_srli_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); +} + +static __inline __m128i +_mm_srli_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); +} + +static __inline __m128i +_mm_srli_epi64 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); +} + +static __inline __m128i +_mm_and_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_andnot_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_or_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_xor_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_cmpeq_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_cmpeq_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_cmpeq_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_cmplt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); +} + +static __inline __m128i +_mm_cmplt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); +} + +static __inline __m128i +_mm_cmplt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); +} + +static __inline __m128i +_mm_cmpgt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_cmpgt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_cmpgt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); +} + +#define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B) + +#define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C)) + +static __inline __m128i +_mm_max_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_max_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_min_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_min_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline int +_mm_movemask_epi8 (__m128i __A) +{ + return __builtin_ia32_pmovmskb128 ((__v16qi)__A); +} + +static __inline __m128i +_mm_mulhi_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); +} + +#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) +#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) +#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) + +static __inline void +_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) +{ + __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); +} + +static __inline __m128i +_mm_avg_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_avg_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_sad_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline void +_mm_stream_si32 (int *__A, int __B) +{ + __builtin_ia32_movnti (__A, __B); +} + +static __inline void +_mm_stream_si128 (__m128i *__A, __m128i __B) +{ + __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); +} + +static __inline void +_mm_stream_pd (double *__A, __m128d __B) +{ + __builtin_ia32_movntpd (__A, (__v2df)__B); +} + +static __inline __m128i +_mm_movpi64_epi64 (__m64 __A) +{ + return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A); +} + +static __inline void +_mm_clflush (void const *__A) +{ + return __builtin_ia32_clflush (__A); +} + +static __inline void +_mm_lfence (void) +{ + __builtin_ia32_lfence (); +} + +static __inline void +_mm_mfence (void) +{ + __builtin_ia32_mfence (); +} + +static __inline __m128i +_mm_cvtsi32_si128 (int __A) +{ + return (__m128i) __builtin_ia32_loadd (&__A); +} + +#ifdef __x86_64__ +static __inline __m128i +_mm_cvtsi64x_si128 (long long __A) +{ + return (__m128i) __builtin_ia32_movq2dq (__A); +} +#endif + +static __inline int +_mm_cvtsi128_si32 (__m128i __A) +{ + int __tmp; + __builtin_ia32_stored (&__tmp, (__v4si)__A); + return __tmp; +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvtsi128_si64x (__m128i __A) +{ + return __builtin_ia32_movdq2q ((__v2di)__A); +} +#endif + +#endif /* __SSE2__ */ + +#endif /* _EMMINTRIN_H_INCLUDED */ diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c index 8d033b9..3abae59 100644 --- a/contrib/gcc/config/i386/i386.c +++ b/contrib/gcc/config/i386/i386.c @@ -1257,6 +1257,14 @@ override_options () if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) target_flags &= ~MASK_NO_FANCY_MATH_387; + /* Turn on SSE2 builtins for -mpni. */ + if (TARGET_PNI) + target_flags |= MASK_SSE2; + + /* Turn on SSE builtins for -msse2. */ + if (TARGET_SSE2) + target_flags |= MASK_SSE; + if (TARGET_64BIT) { if (TARGET_ALIGN_DOUBLE) @@ -2463,23 +2471,55 @@ int ix86_return_in_memory (type) tree type; { - int needed_intregs, needed_sseregs; + int needed_intregs, needed_sseregs, size; + enum machine_mode mode = TYPE_MODE (type); + if (TARGET_64BIT) + return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); + + if (mode == BLKmode) + return 1; + + size = int_size_in_bytes (type); + + if (VECTOR_MODE_P (mode) || mode == TImode) { - return !examine_argument (TYPE_MODE (type), type, 1, - &needed_intregs, &needed_sseregs); - } - else - { - if (TYPE_MODE (type) == BLKmode - || (VECTOR_MODE_P (TYPE_MODE (type)) - && int_size_in_bytes (type) == 8) - || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode - && TYPE_MODE (type) != TFmode - && !VECTOR_MODE_P (TYPE_MODE (type)))) + /* User-created vectors small enough to fit in EAX. */ + if (size < 8) + return 0; + + /* MMX/3dNow values are returned on the stack, since we've + got to EMMS/FEMMS before returning. */ + if (size == 8) return 1; - return 0; + + /* SSE values are returned in XMM0. */ + /* ??? Except when it doesn't exist? We have a choice of + either (1) being abi incompatible with a -march switch, + or (2) generating an error here. Given no good solution, + I think the safest thing is one warning. The user won't + be able to use -Werror, but... */ + if (size == 16) + { + static bool warned; + + if (TARGET_SSE) + return 0; + + if (!warned) + { + warned = true; + warning ("SSE vector return without SSE enabled changes the ABI"); + } + return 1; + } } + + if (mode == TFmode) + return 0; + if (size > 12) + return 1; + return 0; } /* Define how to find the value returned by a library function @@ -2514,10 +2554,14 @@ static int ix86_value_regno (mode) enum machine_mode mode; { + /* Floating point return values in %st(0). */ if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) return FIRST_FLOAT_REG; - if (mode == TImode || VECTOR_MODE_P (mode)) + /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where + we prevent this case when sse is not available. */ + if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) return FIRST_SSE_REG; + /* Everything else in %eax. */ return 0; } @@ -4230,7 +4274,7 @@ ix86_setup_frame_addresses () cfun->machine->accesses_prev_frame = 1; } -#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY) +#if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY) # define USE_HIDDEN_LINKONCE 1 #else # define USE_HIDDEN_LINKONCE 0 @@ -6701,8 +6745,8 @@ get_some_local_dynamic_name_1 (px, data) C -- print opcode suffix for set/cmov insn. c -- like C, but print reversed condition F,f -- likewise, but for floating-point. - O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise - nothing + O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", + otherwise nothing R -- print the prefix for register names. z -- print the opcode suffix for the size of the current operand. * -- print a star (in certain assembler syntax) @@ -6906,7 +6950,7 @@ print_operand (file, x, code) } return; case 'O': -#ifdef CMOV_SUN_AS_SYNTAX +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX if (ASSEMBLER_DIALECT == ASM_ATT) { switch (GET_MODE (x)) @@ -6926,7 +6970,7 @@ print_operand (file, x, code) put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); return; case 'F': -#ifdef CMOV_SUN_AS_SYNTAX +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX if (ASSEMBLER_DIALECT == ASM_ATT) putc ('.', file); #endif @@ -6945,7 +6989,7 @@ print_operand (file, x, code) put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); return; case 'f': -#ifdef CMOV_SUN_AS_SYNTAX +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX if (ASSEMBLER_DIALECT == ASM_ATT) putc ('.', file); #endif @@ -11147,10 +11191,15 @@ memory_address_length (addr) disp = parts.disp; len = 0; + /* Rule of thumb: + - esp as the base always wants an index, + - ebp as the base always wants a displacement. */ + /* Register Indirect. */ if (base && !index && !disp) { - /* Special cases: ebp and esp need the two-byte modrm form. */ + /* esp (for its index) and ebp (for its displacement) need + the two-byte modrm form. */ if (addr == stack_pointer_rtx || addr == arg_pointer_rtx || addr == frame_pointer_rtx @@ -11174,9 +11223,16 @@ memory_address_length (addr) else len = 4; } + /* ebp always wants a displacement. */ + else if (base == hard_frame_pointer_rtx) + len = 1; - /* An index requires the two-byte modrm form. */ - if (index) + /* An index requires the two-byte modrm form... */ + if (index + /* ...like esp, which always wants an index. */ + || base == stack_pointer_rtx + || base == arg_pointer_rtx + || base == frame_pointer_rtx) len += 1; } @@ -12066,25 +12122,20 @@ struct builtin_description const unsigned int flag; }; -/* Used for builtins that are enabled both by -msse and -msse2. */ -#define MASK_SSE1 (MASK_SSE | MASK_SSE2) -#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) -#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) - static const struct builtin_description bdesc_comi[] = { - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, @@ -12102,51 +12153,51 @@ static const struct builtin_description bdesc_comi[] = static const struct builtin_description bdesc_2arg[] = { /* SSE */ - { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, - - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, - - { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, - - { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, - - { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, + { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, + { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, + { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, + + { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, + { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, /* MMX */ { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, @@ -12169,15 +12220,15 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, @@ -12186,10 +12237,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, @@ -12203,9 +12254,9 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, - { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -12226,7 +12277,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, /* SSE2 */ @@ -12356,26 +12407,34 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, - { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } + { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, + + /* PNI MMX */ + { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, + { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, + { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, + { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, + { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, + { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = { - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, - { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, - { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, @@ -12397,14 +12456,19 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, - { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, - { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 } + { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, + + /* PNI */ + { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, + { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, + { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } }; void @@ -12495,6 +12559,13 @@ ix86_init_mmx_sse_builtins () = build_function_type (void_type_node, void_list_node); tree void_ftype_unsigned = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); + tree void_ftype_unsigned_unsigned + = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, NULL_TREE); + tree void_ftype_pcvoid_unsigned_unsigned + = build_function_type_list (void_type_node, const_ptr_type_node, + unsigned_type_node, unsigned_type_node, + NULL_TREE); tree unsigned_ftype_void = build_function_type (unsigned_type_node, void_list_node); tree di_ftype_void @@ -12813,52 +12884,52 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); - def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); - def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); - def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); - def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); - def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); - def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - - def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); - def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); - - def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); - - def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); - def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); - - def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); - def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); - def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); - def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); - def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); - def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); - - def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); + def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); + def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); + def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); + def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); + def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); + def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); + def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); + def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); + + def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); + def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); + def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); + def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); + + def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); + def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); + def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); + + def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); + def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); + + def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); + def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); + def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); + def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); + def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); + def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); + + def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); /* Original 3DNow! */ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); @@ -12890,7 +12961,7 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); - def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); + def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); /* SSE2 */ def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); @@ -12941,15 +13012,15 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); - def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); - def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); - def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); @@ -12973,7 +13044,7 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); - def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); + def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); @@ -13000,6 +13071,26 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); + + /* Prescott New Instructions. */ + def_builtin (MASK_PNI, "__builtin_ia32_monitor", + void_ftype_pcvoid_unsigned_unsigned, + IX86_BUILTIN_MONITOR); + def_builtin (MASK_PNI, "__builtin_ia32_mwait", + void_ftype_unsigned_unsigned, + IX86_BUILTIN_MWAIT); + def_builtin (MASK_PNI, "__builtin_ia32_movshdup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSHDUP); + def_builtin (MASK_PNI, "__builtin_ia32_movsldup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSLDUP); + def_builtin (MASK_PNI, "__builtin_ia32_lddqu", + v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + def_builtin (MASK_PNI, "__builtin_ia32_loadddup", + v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP); + def_builtin (MASK_PNI, "__builtin_ia32_movddup", + v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -13808,6 +13899,41 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STORED: return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); + case IX86_BUILTIN_MONITOR: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + emit_insn (gen_monitor (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_MWAIT: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + emit_insn (gen_mwait (op0, op1)); + return 0; + + case IX86_BUILTIN_LOADDDUP: + return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1); + + case IX86_BUILTIN_LDDQU: + return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target, + 1); + default: break; } diff --git a/contrib/gcc/config/i386/i386.h b/contrib/gcc/config/i386/i386.h index ead133b..32bc5d0 100644 --- a/contrib/gcc/config/i386/i386.h +++ b/contrib/gcc/config/i386/i386.h @@ -114,10 +114,11 @@ extern int target_flags; #define MASK_MMX 0x00002000 /* Support MMX regs/builtins */ #define MASK_SSE 0x00004000 /* Support SSE regs/builtins */ #define MASK_SSE2 0x00008000 /* Support SSE2 regs/builtins */ -#define MASK_3DNOW 0x00010000 /* Support 3Dnow builtins */ -#define MASK_3DNOW_A 0x00020000 /* Support Athlon 3Dnow builtins */ -#define MASK_128BIT_LONG_DOUBLE 0x00040000 /* long double size is 128bit */ -#define MASK_64BIT 0x00080000 /* Produce 64bit code */ +#define MASK_PNI 0x00010000 /* Support PNI builtins */ +#define MASK_3DNOW 0x00020000 /* Support 3Dnow builtins */ +#define MASK_3DNOW_A 0x00040000 /* Support Athlon 3Dnow builtins */ +#define MASK_128BIT_LONG_DOUBLE 0x00080000 /* long double size is 128bit */ +#define MASK_64BIT 0x00100000 /* Produce 64bit code */ /* Unused: 0x03f0000 */ @@ -271,8 +272,9 @@ extern int x86_prefetch_sse; #define ASSEMBLER_DIALECT (ix86_asm_dialect) -#define TARGET_SSE ((target_flags & (MASK_SSE | MASK_SSE2)) != 0) +#define TARGET_SSE ((target_flags & MASK_SSE) != 0) #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0) +#define TARGET_PNI ((target_flags & MASK_PNI) != 0) #define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0) #define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \ && (ix86_fpmath & FPMATH_387)) @@ -366,6 +368,10 @@ extern int x86_prefetch_sse; N_("Support MMX, SSE and SSE2 built-in functions and code generation") }, \ { "no-sse2", -MASK_SSE2, \ N_("Do not support MMX, SSE and SSE2 built-in functions and code generation") }, \ + { "pni", MASK_PNI, \ + N_("Support MMX, SSE, SSE2 and PNI built-in functions and code generation") }, \ + { "no-pni", -MASK_PNI, \ + N_("Do not support MMX, SSE, SSE2 and PNI built-in functions and code generation") }, \ { "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \ N_("sizeof(long double) is 16") }, \ { "96bit-long-double", -MASK_128BIT_LONG_DOUBLE, \ @@ -554,6 +560,8 @@ extern int x86_prefetch_sse; builtin_define ("__SSE__"); \ if (TARGET_SSE2) \ builtin_define ("__SSE2__"); \ + if (TARGET_PNI) \ + builtin_define ("__PNI__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ @@ -2480,6 +2488,22 @@ enum ix86_builtins IX86_BUILTIN_MFENCE, IX86_BUILTIN_LFENCE, + /* Prescott New Instructions. */ + IX86_BUILTIN_ADDSUBPS, + IX86_BUILTIN_HADDPS, + IX86_BUILTIN_HSUBPS, + IX86_BUILTIN_MOVSHDUP, + IX86_BUILTIN_MOVSLDUP, + IX86_BUILTIN_ADDSUBPD, + IX86_BUILTIN_HADDPD, + IX86_BUILTIN_HSUBPD, + IX86_BUILTIN_LOADDDUP, + IX86_BUILTIN_MOVDDUP, + IX86_BUILTIN_LDDQU, + + IX86_BUILTIN_MONITOR, + IX86_BUILTIN_MWAIT, + IX86_BUILTIN_MAX }; diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index 1fa2998..e4377a5 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -110,6 +110,13 @@ (UNSPEC_MFENCE 59) (UNSPEC_LFENCE 60) (UNSPEC_PSADBW 61) + (UNSPEC_ADDSUB 71) + (UNSPEC_HADD 72) + (UNSPEC_HSUB 73) + (UNSPEC_MOVSHDUP 74) + (UNSPEC_MOVSLDUP 75) + (UNSPEC_LDQQU 76) + (UNSPEC_MOVDDUP 77) ]) (define_constants @@ -120,6 +127,8 @@ (UNSPECV_STMXCSR 40) (UNSPECV_FEMMS 46) (UNSPECV_CLFLUSH 57) + (UNSPECV_MONITOR 69) + (UNSPECV_MWAIT 70) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -22072,3 +22081,129 @@ "lfence" [(set_attr "type" "sse") (set_attr "memory" "unknown")]) + +;; PNI + +(define_insn "mwait" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c")] + UNSPECV_MWAIT)] + "TARGET_PNI" + "mwait\t%0, %1" + [(set_attr "length" "3")]) + +(define_insn "monitor" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITOR)] + "TARGET_PNI" + "monitor\t%0, %1, %2" + [(set_attr "length" "3")]) + +;; PNI arithmetic + +(define_insn "addsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_ADDSUB))] + "TARGET_PNI" + "addsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_ADDSUB))] + "TARGET_PNI" + "addsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "haddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_HADD))] + "TARGET_PNI" + "haddps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "haddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_HADD))] + "TARGET_PNI" + "haddpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "hsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_HSUB))] + "TARGET_PNI" + "hsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_HSUB))] + "TARGET_PNI" + "hsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "movshdup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))] + "TARGET_PNI" + "movshdup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "movsldup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))] + "TARGET_PNI" + "movsldup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "lddqu" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] + UNSPEC_LDQQU))] + "TARGET_PNI" + "lddqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "loadddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))] + "TARGET_PNI" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "movddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF + (vec_select:DF (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_PNI" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) diff --git a/contrib/gcc/config/i386/mmintrin.h b/contrib/gcc/config/i386/mmintrin.h index 7b4aa01..00e77e4 100644 --- a/contrib/gcc/config/i386/mmintrin.h +++ b/contrib/gcc/config/i386/mmintrin.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002 Free Software Foundation, Inc. +/* Copyright (C) 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -25,7 +25,7 @@ Public License. */ /* Implemented from the specification included in the Intel C++ Compiler - User Guide and Reference, version 5.0. */ + User Guide and Reference, version 8.0. */ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED @@ -48,6 +48,12 @@ _mm_empty (void) __builtin_ia32_emms (); } +static __inline void +_m_empty (void) +{ + _mm_empty (); +} + /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ static __inline __m64 _mm_cvtsi32_si64 (int __i) @@ -56,6 +62,12 @@ _mm_cvtsi32_si64 (int __i) return (__m64) __tmp; } +static __inline __m64 +_m_from_int (int __i) +{ + return _mm_cvtsi32_si64 (__i); +} + #ifdef __x86_64__ /* Convert I to a __m64 object. */ static __inline __m64 @@ -80,6 +92,12 @@ _mm_cvtsi64_si32 (__m64 __i) return __tmp; } +static __inline int +_m_to_int (__m64 __i) +{ + return _mm_cvtsi64_si32 (__i); +} + #ifdef __x86_64__ /* Convert the lower 32 bits of the __m64 object into an integer. */ static __inline long long @@ -98,6 +116,12 @@ _mm_packs_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_packsswb (__m64 __m1, __m64 __m2) +{ + return _mm_packs_pi16 (__m1, __m2); +} + /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of the result, and the two 32-bit values from M2 into the upper two 16-bit values of the result, all with signed saturation. */ @@ -107,6 +131,12 @@ _mm_packs_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); } +static __inline __m64 +_m_packssdw (__m64 __m1, __m64 __m2) +{ + return _mm_packs_pi32 (__m1, __m2); +} + /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with unsigned saturation. */ @@ -116,6 +146,12 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_packuswb (__m64 __m1, __m64 __m2) +{ + return _mm_packs_pu16 (__m1, __m2); +} + /* Interleave the four 8-bit values from the high half of M1 with the four 8-bit values from the high half of M2. */ static __inline __m64 @@ -124,6 +160,12 @@ _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_punpckhbw (__m64 __m1, __m64 __m2) +{ + return _mm_unpackhi_pi8 (__m1, __m2); +} + /* Interleave the two 16-bit values from the high half of M1 with the two 16-bit values from the high half of M2. */ static __inline __m64 @@ -132,6 +174,12 @@ _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_punpckhwd (__m64 __m1, __m64 __m2) +{ + return _mm_unpackhi_pi16 (__m1, __m2); +} + /* Interleave the 32-bit value from the high half of M1 with the 32-bit value from the high half of M2. */ static __inline __m64 @@ -140,6 +188,12 @@ _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); } +static __inline __m64 +_m_punpckhdq (__m64 __m1, __m64 __m2) +{ + return _mm_unpackhi_pi32 (__m1, __m2); +} + /* Interleave the four 8-bit values from the low half of M1 with the four 8-bit values from the low half of M2. */ static __inline __m64 @@ -148,6 +202,12 @@ _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_punpcklbw (__m64 __m1, __m64 __m2) +{ + return _mm_unpacklo_pi8 (__m1, __m2); +} + /* Interleave the two 16-bit values from the low half of M1 with the two 16-bit values from the low half of M2. */ static __inline __m64 @@ -156,6 +216,12 @@ _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_punpcklwd (__m64 __m1, __m64 __m2) +{ + return _mm_unpacklo_pi16 (__m1, __m2); +} + /* Interleave the 32-bit value from the low half of M1 with the 32-bit value from the low half of M2. */ static __inline __m64 @@ -164,6 +230,12 @@ _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); } +static __inline __m64 +_m_punpckldq (__m64 __m1, __m64 __m2) +{ + return _mm_unpacklo_pi32 (__m1, __m2); +} + /* Add the 8-bit values in M1 to the 8-bit values in M2. */ static __inline __m64 _mm_add_pi8 (__m64 __m1, __m64 __m2) @@ -171,6 +243,12 @@ _mm_add_pi8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_paddb (__m64 __m1, __m64 __m2) +{ + return _mm_add_pi8 (__m1, __m2); +} + /* Add the 16-bit values in M1 to the 16-bit values in M2. */ static __inline __m64 _mm_add_pi16 (__m64 __m1, __m64 __m2) @@ -178,6 +256,12 @@ _mm_add_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_paddw (__m64 __m1, __m64 __m2) +{ + return _mm_add_pi16 (__m1, __m2); +} + /* Add the 32-bit values in M1 to the 32-bit values in M2. */ static __inline __m64 _mm_add_pi32 (__m64 __m1, __m64 __m2) @@ -185,6 +269,12 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); } +static __inline __m64 +_m_paddd (__m64 __m1, __m64 __m2) +{ + return _mm_add_pi32 (__m1, __m2); +} + /* Add the 64-bit values in M1 to the 64-bit values in M2. */ static __inline __m64 _mm_add_si64 (__m64 __m1, __m64 __m2) @@ -200,6 +290,12 @@ _mm_adds_pi8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_paddsb (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pi8 (__m1, __m2); +} + /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed saturated arithmetic. */ static __inline __m64 @@ -208,6 +304,12 @@ _mm_adds_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_paddsw (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pi16 (__m1, __m2); +} + /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned saturated arithmetic. */ static __inline __m64 @@ -216,6 +318,12 @@ _mm_adds_pu8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_paddusb (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pu8 (__m1, __m2); +} + /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned saturated arithmetic. */ static __inline __m64 @@ -224,6 +332,12 @@ _mm_adds_pu16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_paddusw (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pu16 (__m1, __m2); +} + /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ static __inline __m64 _mm_sub_pi8 (__m64 __m1, __m64 __m2) @@ -231,6 +345,12 @@ _mm_sub_pi8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_psubb (__m64 __m1, __m64 __m2) +{ + return _mm_sub_pi8 (__m1, __m2); +} + /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ static __inline __m64 _mm_sub_pi16 (__m64 __m1, __m64 __m2) @@ -238,6 +358,12 @@ _mm_sub_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_psubw (__m64 __m1, __m64 __m2) +{ + return _mm_sub_pi16 (__m1, __m2); +} + /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ static __inline __m64 _mm_sub_pi32 (__m64 __m1, __m64 __m2) @@ -245,6 +371,12 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); } +static __inline __m64 +_m_psubd (__m64 __m1, __m64 __m2) +{ + return _mm_sub_pi32 (__m1, __m2); +} + /* Add the 64-bit values in M1 to the 64-bit values in M2. */ static __inline __m64 _mm_sub_si64 (__m64 __m1, __m64 __m2) @@ -260,6 +392,12 @@ _mm_subs_pi8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_psubsb (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pi8 (__m1, __m2); +} + /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using signed saturating arithmetic. */ static __inline __m64 @@ -268,6 +406,12 @@ _mm_subs_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_psubsw (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pi16 (__m1, __m2); +} + /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using unsigned saturating arithmetic. */ static __inline __m64 @@ -276,6 +420,12 @@ _mm_subs_pu8 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_psubusb (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pu8 (__m1, __m2); +} + /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using unsigned saturating arithmetic. */ static __inline __m64 @@ -284,6 +434,12 @@ _mm_subs_pu16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_psubusw (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pu16 (__m1, __m2); +} + /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing four 32-bit intermediate results, which are then summed by pairs to produce two 32-bit results. */ @@ -293,6 +449,12 @@ _mm_madd_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_pmaddwd (__m64 __m1, __m64 __m2) +{ + return _mm_madd_pi16 (__m1, __m2); +} + /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in M2 and produce the high 16 bits of the 32-bit results. */ static __inline __m64 @@ -301,6 +463,12 @@ _mm_mulhi_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_pmulhw (__m64 __m1, __m64 __m2) +{ + return _mm_mulhi_pi16 (__m1, __m2); +} + /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce the low 16 bits of the results. */ static __inline __m64 @@ -309,6 +477,12 @@ _mm_mullo_pi16 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_pmullw (__m64 __m1, __m64 __m2) +{ + return _mm_mullo_pi16 (__m1, __m2); +} + /* Shift four 16-bit values in M left by COUNT. */ static __inline __m64 _mm_sll_pi16 (__m64 __m, __m64 __count) @@ -317,11 +491,23 @@ _mm_sll_pi16 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psllw (__m64 __m, __m64 __count) +{ + return _mm_sll_pi16 (__m, __count); +} + +static __inline __m64 _mm_slli_pi16 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); } +static __inline __m64 +_m_psllwi (__m64 __m, int __count) +{ + return _mm_slli_pi16 (__m, __count); +} + /* Shift two 32-bit values in M left by COUNT. */ static __inline __m64 _mm_sll_pi32 (__m64 __m, __m64 __count) @@ -330,11 +516,23 @@ _mm_sll_pi32 (__m64 __m, __m64 __count) } static __inline __m64 +_m_pslld (__m64 __m, __m64 __count) +{ + return _mm_sll_pi32 (__m, __count); +} + +static __inline __m64 _mm_slli_pi32 (__m64 __m, int __count) { return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); } +static __inline __m64 +_m_pslldi (__m64 __m, int __count) +{ + return _mm_slli_pi32 (__m, __count); +} + /* Shift the 64-bit value in M left by COUNT. */ static __inline __m64 _mm_sll_si64 (__m64 __m, __m64 __count) @@ -343,11 +541,23 @@ _mm_sll_si64 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psllq (__m64 __m, __m64 __count) +{ + return _mm_sll_si64 (__m, __count); +} + +static __inline __m64 _mm_slli_si64 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); } +static __inline __m64 +_m_psllqi (__m64 __m, int __count) +{ + return _mm_slli_si64 (__m, __count); +} + /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ static __inline __m64 _mm_sra_pi16 (__m64 __m, __m64 __count) @@ -356,11 +566,23 @@ _mm_sra_pi16 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psraw (__m64 __m, __m64 __count) +{ + return _mm_sra_pi16 (__m, __count); +} + +static __inline __m64 _mm_srai_pi16 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); } +static __inline __m64 +_m_psrawi (__m64 __m, int __count) +{ + return _mm_srai_pi16 (__m, __count); +} + /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ static __inline __m64 _mm_sra_pi32 (__m64 __m, __m64 __count) @@ -369,11 +591,23 @@ _mm_sra_pi32 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psrad (__m64 __m, __m64 __count) +{ + return _mm_sra_pi32 (__m, __count); +} + +static __inline __m64 _mm_srai_pi32 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); } +static __inline __m64 +_m_psradi (__m64 __m, int __count) +{ + return _mm_srai_pi32 (__m, __count); +} + /* Shift four 16-bit values in M right by COUNT; shift in zeros. */ static __inline __m64 _mm_srl_pi16 (__m64 __m, __m64 __count) @@ -382,11 +616,23 @@ _mm_srl_pi16 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psrlw (__m64 __m, __m64 __count) +{ + return _mm_srl_pi16 (__m, __count); +} + +static __inline __m64 _mm_srli_pi16 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); } +static __inline __m64 +_m_psrlwi (__m64 __m, int __count) +{ + return _mm_srli_pi16 (__m, __count); +} + /* Shift two 32-bit values in M right by COUNT; shift in zeros. */ static __inline __m64 _mm_srl_pi32 (__m64 __m, __m64 __count) @@ -395,11 +641,23 @@ _mm_srl_pi32 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psrld (__m64 __m, __m64 __count) +{ + return _mm_srl_pi32 (__m, __count); +} + +static __inline __m64 _mm_srli_pi32 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); } +static __inline __m64 +_m_psrldi (__m64 __m, int __count) +{ + return _mm_srli_pi32 (__m, __count); +} + /* Shift the 64-bit value in M left by COUNT; shift in zeros. */ static __inline __m64 _mm_srl_si64 (__m64 __m, __m64 __count) @@ -408,11 +666,23 @@ _mm_srl_si64 (__m64 __m, __m64 __count) } static __inline __m64 +_m_psrlq (__m64 __m, __m64 __count) +{ + return _mm_srl_si64 (__m, __count); +} + +static __inline __m64 _mm_srli_si64 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); } +static __inline __m64 +_m_psrlqi (__m64 __m, int __count) +{ + return _mm_srli_si64 (__m, __count); +} + /* Bit-wise AND the 64-bit values in M1 and M2. */ static __inline __m64 _mm_and_si64 (__m64 __m1, __m64 __m2) @@ -420,6 +690,12 @@ _mm_and_si64 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); } +static __inline __m64 +_m_pand (__m64 __m1, __m64 __m2) +{ + return _mm_and_si64 (__m1, __m2); +} + /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 64-bit value in M2. */ static __inline __m64 @@ -428,6 +704,12 @@ _mm_andnot_si64 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); } +static __inline __m64 +_m_pandn (__m64 __m1, __m64 __m2) +{ + return _mm_andnot_si64 (__m1, __m2); +} + /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ static __inline __m64 _mm_or_si64 (__m64 __m1, __m64 __m2) @@ -435,6 +717,12 @@ _mm_or_si64 (__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); } +static __inline __m64 +_m_por (__m64 __m1, __m64 __m2) +{ + return _mm_or_si64 (__m1, __m2); +} + /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ static __inline __m64 _mm_xor_si64 (__m64 __m1, __m64 __m2) @@ -442,6 +730,12 @@ _mm_xor_si64 (__m64 __m1, __m64 __m2) return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); } +static __inline __m64 +_m_pxor (__m64 __m1, __m64 __m2) +{ + return _mm_xor_si64 (__m1, __m2); +} + /* Compare eight 8-bit values. The result of the comparison is 0xFF if the test is true and zero if false. */ static __inline __m64 @@ -451,11 +745,23 @@ _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) } static __inline __m64 +_m_pcmpeqb (__m64 __m1, __m64 __m2) +{ + return _mm_cmpeq_pi8 (__m1, __m2); +} + +static __inline __m64 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); } +static __inline __m64 +_m_pcmpgtb (__m64 __m1, __m64 __m2) +{ + return _mm_cmpgt_pi8 (__m1, __m2); +} + /* Compare four 16-bit values. The result of the comparison is 0xFFFF if the test is true and zero if false. */ static __inline __m64 @@ -465,11 +771,23 @@ _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) } static __inline __m64 +_m_pcmpeqw (__m64 __m1, __m64 __m2) +{ + return _mm_cmpeq_pi16 (__m1, __m2); +} + +static __inline __m64 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); } +static __inline __m64 +_m_pcmpgtw (__m64 __m1, __m64 __m2) +{ + return _mm_cmpgt_pi16 (__m1, __m2); +} + /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if the test is true and zero if false. */ static __inline __m64 @@ -479,11 +797,23 @@ _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) } static __inline __m64 +_m_pcmpeqd (__m64 __m1, __m64 __m2) +{ + return _mm_cmpeq_pi32 (__m1, __m2); +} + +static __inline __m64 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); } +static __inline __m64 +_m_pcmpgtd (__m64 __m1, __m64 __m2) +{ + return _mm_cmpgt_pi32 (__m1, __m2); +} + /* Creates a 64-bit zero. */ static __inline __m64 _mm_setzero_si64 (void) @@ -574,7 +904,7 @@ _mm_set1_pi16 (short __w) return _mm_set1_pi32 (__i); } -/* Creates a vector of four 16-bit values, all elements containing B. */ +/* Creates a vector of eight 8-bit values, all elements containing B. */ static __inline __m64 _mm_set1_pi8 (char __b) { diff --git a/contrib/gcc/config/i386/pmmintrin.h b/contrib/gcc/config/i386/pmmintrin.h new file mode 100644 index 0000000..5649c00 --- /dev/null +++ b/contrib/gcc/config/i386/pmmintrin.h @@ -0,0 +1,132 @@ +/* Copyright (C) 2003 Free Software Foundation, Inc. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 8.0. */ + +#ifndef _PMMINTRIN_H_INCLUDED +#define _PMMINTRIN_H_INCLUDED + +#ifdef __PNI__ +#include <xmmintrin.h> +#include <emmintrin.h> + +/* Additional bits in the MXCSR. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#define _MM_DENORMALS_ZERO_ON 0x0040 +#define _MM_DENORMALS_ZERO_OFF 0x0000 + +#define _MM_SET_DENORMALS_ZERO_MODE(mode) \ + _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode)) +#define _MM_GET_DENORMALS_ZERO_MODE() \ + (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) + +static __inline __m128 +_mm_addsub_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y); +} + +static __inline __m128 +_mm_hadd_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y); +} + +static __inline __m128 +_mm_hsub_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y); +} + +static __inline __m128 +_mm_movehdup_ps (__m128 __X) +{ + return (__m128) __builtin_ia32_movshdup ((__v4sf)__X); +} + +static __inline __m128 +_mm_moveldup_ps (__m128 __X) +{ + return (__m128) __builtin_ia32_movsldup ((__v4sf)__X); +} + +static __inline __m128d +_mm_addsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y); +} + +static __inline __m128d +_mm_hadd_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y); +} + +static __inline __m128d +_mm_hsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y); +} + +static __inline __m128d +_mm_loaddup_pd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadddup (__P); +} + +static __inline __m128d +_mm_movedup_pd (__m128d __X) +{ + return (__m128d) __builtin_ia32_movddup ((__v2df)__X); +} + +static __inline __m128i +_mm_lddqu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_lddqu ((char const *)__P); +} + +#if 0 +static __inline void +_mm_monitor (void const * __P, unsigned int __E, unsigned int __H) +{ + __builtin_ia32_monitor (__P, __E, __H); +} + +static __inline void +_mm_mwait (unsigned int __E, unsigned int __H) +{ + __builtin_ia32_mwait (__E, __H); +} +#else +#define _mm_monitor(P, E, H) __builtin_ia32_monitor ((P), (E), (H)) +#define _mm_mwait(E, H) __builtin_ia32_mwait ((E), (H)) +#endif + +#endif /* __PNI__ */ + +#endif /* _PMMINTRIN_H_INCLUDED */ diff --git a/contrib/gcc/config/i386/sco5.h b/contrib/gcc/config/i386/sco5.h index 815e457..2a28e54 100644 --- a/contrib/gcc/config/i386/sco5.h +++ b/contrib/gcc/config/i386/sco5.h @@ -1,7 +1,7 @@ /* Definitions for Intel 386 running SCO Unix System V 3.2 Version 5. - Copyright (C) 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2002 + Copyright (C) 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. - Contributed by Kean Johnston (hug@netcom.com) + Contributed by Kean Johnston (jkj@sco.com) This file is part of GNU CC. @@ -20,230 +20,36 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386, SCO OpenServer 5 Syntax)"); -#undef LPREFIX -#define LPREFIX ".L" - -#undef ALIGN_ASM_OP -#define ALIGN_ASM_OP "\t.align\t" - -#undef ASCII_DATA_ASM_OP -#define ASCII_DATA_ASM_OP "\t.ascii\t" - -#undef IDENT_ASM_OP -#define IDENT_ASM_OP "\t.ident\t" - -#undef COMMON_ASM_OP -#define COMMON_ASM_OP "\t.comm\t" - -#undef SET_ASM_OP -#define SET_ASM_OP "\t.set\t" - -#undef LOCAL_ASM_OP -#define LOCAL_ASM_OP "\t.local\t" - -#undef ASM_SHORT -#define ASM_SHORT "\t.value\t" - -#undef ASM_LONG -#define ASM_LONG "\t.long\t" +/* The native link editor does not support linkonce stuff */ +#define SUPPORTS_ONE_ONLY 0 #undef ASM_QUAD -#undef TYPE_ASM_OP -#define TYPE_ASM_OP "\t.type\t" - -#undef SIZE_ASM_OP -#define SIZE_ASM_OP "\t.size\t" - -#undef STRING_ASM_OP -#define STRING_ASM_OP "\t.string\t" - -#undef SKIP_ASM_OP -#define SKIP_ASM_OP "\t.zero\t" - #undef GLOBAL_ASM_OP #define GLOBAL_ASM_OP "\t.globl\t" -#undef EH_FRAME_SECTION_ASM_OP -#define EH_FRAME_SECTION_NAME_COFF ".ehfram" -#define EH_FRAME_SECTION_NAME_ELF ".eh_frame" -#define EH_FRAME_SECTION_NAME \ - ((TARGET_ELF) ? EH_FRAME_SECTION_NAME_ELF : EH_FRAME_SECTION_NAME_COFF) - -/* Avoid problems (long sectino names, forward assembler refs) with DWARF - exception unwinding when we're generating COFF */ -#define DWARF2_UNWIND_INFO \ - ((TARGET_ELF) ? 1 : 0 ) - -#undef READONLY_DATA_SECTION_ASM_OP -#define READONLY_DATA_SECTION_ASM_OP_COFF "\t.section\t.rodata, \"x\"" -#define READONLY_DATA_SECTION_ASM_OP_ELF "\t.section\t.rodata" -#define READONLY_DATA_SECTION_ASM_OP \ - ((TARGET_ELF) \ - ? READONLY_DATA_SECTION_ASM_OP_ELF \ - : READONLY_DATA_SECTION_ASM_OP_COFF) - -#undef INIT_SECTION_ASM_OP -#define INIT_SECTION_ASM_OP_ELF "\t.section\t.init" -/* Rename these for COFF because crt1.o will try to run them. */ -#define INIT_SECTION_ASM_OP_COFF "\t.section\t.ctor ,\"x\"" -#define INIT_SECTION_ASM_OP \ - ((TARGET_ELF) ? INIT_SECTION_ASM_OP_ELF : INIT_SECTION_ASM_OP_COFF) - -#undef CTORS_SECTION_ASM_OP -#define CTORS_SECTION_ASM_OP_ELF "\t.section\t.ctors,\"aw\"" -#define CTORS_SECTION_ASM_OP_COFF INIT_SECTION_ASM_OP_COFF -#define CTORS_SECTION_ASM_OP \ - ((TARGET_ELF) ? CTORS_SECTION_ASM_OP_ELF : CTORS_SECTION_ASM_OP_COFF) - -#undef DTORS_SECTION_ASM_OP -#define DTORS_SECTION_ASM_OP_ELF "\t.section\t.dtors, \"aw\"" -#define DTORS_SECTION_ASM_OP_COFF FINI_SECTION_ASM_OP_COFF -#define DTORS_SECTION_ASM_OP \ - ((TARGET_ELF) ? DTORS_SECTION_ASM_OP_ELF : DTORS_SECTION_ASM_OP_COFF) - -#undef FINI_SECTION_ASM_OP -#define FINI_SECTION_ASM_OP_ELF "\t.section\t.fini" -#define FINI_SECTION_ASM_OP_COFF "\t.section\t.dtor, \"x\"" -#define FINI_SECTION_ASM_OP \ - ((TARGET_ELF) ? FINI_SECTION_ASM_OP_ELF : FINI_SECTION_ASM_OP_COFF) - #undef BSS_SECTION_ASM_OP -#define BSS_SECTION_ASM_OP "\t.data" - -#undef TEXT_SECTION_ASM_OP -#define TEXT_SECTION_ASM_OP "\t.text" - -#undef DATA_SECTION_ASM_OP -#define DATA_SECTION_ASM_OP "\t.data" - -#undef TYPE_OPERAND_FMT -#define TYPE_OPERAND_FMT "@%s" - -#undef APPLY_RESULT_SIZE -#define APPLY_RESULT_SIZE \ -(TARGET_ELF) ? size : 116 - -#ifndef ASM_DECLARE_RESULT -#define ASM_DECLARE_RESULT(FILE, RESULT) -#endif - -#define SCO_DEFAULT_ASM_COFF(FILE,NAME) \ -do { \ - ASM_OUTPUT_LABEL (FILE, NAME); \ - } while (0) - -#undef ASM_DECLARE_FUNCTION_NAME -#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ - do { \ - if (TARGET_ELF) { \ - ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ - ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ - ASM_OUTPUT_LABEL (FILE, NAME); \ - } else \ - SCO_DEFAULT_ASM_COFF(FILE, NAME); \ -} while (0) - -#undef ASM_DECLARE_FUNCTION_SIZE -#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ - do { \ - if (TARGET_ELF && !flag_inhibit_size_directive) \ - ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ - } while (0) - -#undef ASM_DECLARE_OBJECT_NAME -#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ - do { \ - if (TARGET_ELF) { \ - HOST_WIDE_INT size; \ - \ - ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ - \ - size_directive_output = 0; \ - if (!flag_inhibit_size_directive \ - && (DECL) && DECL_SIZE (DECL)) \ - { \ - size_directive_output = 1; \ - size = int_size_in_bytes (TREE_TYPE (DECL)); \ - ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ - } \ - \ - ASM_OUTPUT_LABEL (FILE, NAME); \ - } else \ - SCO_DEFAULT_ASM_COFF(FILE, NAME); \ - } while (0) +#define BSS_SECTION_ASM_OP "\t.section\t.bss, \"aw\", @nobits" + +/* + * NOTE: We really do want CTORS_SECTION_ASM_OP and DTORS_SECTION_ASM_OP. + * Here's the reason why. If we dont define them, and we dont define them + * to always emit to the same section, the default is to emit to "named" + * ctors and dtors sections. This would be great if we could use GNU ld, + * but we can't. The native linker could possibly be trained to coalesce + * named ctors sections, but that hasn't been done either. So if we don't + * define these, many C++ ctors and dtors dont get run, because they never + * wind up in the ctors/dtors arrays. + */ +#define CTORS_SECTION_ASM_OP "\t.section\t.ctors, \"aw\"" +#define DTORS_SECTION_ASM_OP "\t.section\t.dtors, \"aw\"" -#undef ASM_FILE_START_1 -#define ASM_FILE_START_1(FILE) - -#undef ASM_FILE_START -#define ASM_FILE_START(FILE) \ -do { \ - output_file_directive((FILE),main_input_filename); \ - fprintf ((FILE), "\t.version\t\"01.01\"\n"); \ -} while (0) - -#undef ASM_FINISH_DECLARE_OBJECT -#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END) \ -do { \ - if (TARGET_ELF) { \ - const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ - HOST_WIDE_INT size; \ - if (!flag_inhibit_size_directive && DECL_SIZE (DECL) \ - && ! AT_END && TOP_LEVEL \ - && DECL_INITIAL (DECL) == error_mark_node \ - && !size_directive_output) \ - { \ - size_directive_output = 1; \ - size = int_size_in_bytes (TREE_TYPE (DECL)); \ - ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ - } \ - } \ -} while (0) - -#undef ASM_GENERATE_INTERNAL_LABEL -#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ -do { \ - if (TARGET_ELF) \ - sprintf (LABEL, "*.%s%ld", (PREFIX), (long)(NUM)); \ - else \ - sprintf (LABEL, ".%s%ld", (PREFIX), (long)(NUM)); \ -} while (0) - -#undef ASM_OUTPUT_ALIGNED_COMMON -#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ -do { \ - fprintf ((FILE), "%s", COMMON_ASM_OP); \ - assemble_name ((FILE), (NAME)); \ - if (TARGET_ELF) \ - fprintf ((FILE), ",%u,%u\n", (SIZE), (ALIGN) / BITS_PER_UNIT); \ - else \ - fprintf ((FILE), ",%u\n", (SIZE)); \ -} while (0) - -#undef ASM_OUTPUT_ALIGNED_LOCAL -#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ -do { \ - if (TARGET_ELF) { \ - fprintf ((FILE), "%s", LOCAL_ASM_OP); \ - assemble_name ((FILE), (NAME)); \ - fprintf ((FILE), "\n"); \ - ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN); \ - } else { \ - int align = exact_log2 (ALIGN); \ - if (align > 2) align = 2; \ - if (TARGET_SVR3_SHLIB) \ - data_section (); \ - else \ - bss_section (); \ - ASM_OUTPUT_ALIGN ((FILE), align == -1 ? 2 : align); \ - fprintf ((FILE), "%s\t", "\t.lcomm"); \ - assemble_name ((FILE), (NAME)); \ - fprintf ((FILE), ",%u\n", (SIZE)); \ - } \ -} while (0) +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true +#undef X86_FILE_START_VERSION_DIRECTIVE +#define X86_FILE_START_VERSION_DIRECTIVE true /* A C statement (sans semicolon) to output to the stdio stream FILE the assembler definition of uninitialized global DECL named @@ -251,248 +57,23 @@ do { \ Try to use asm_output_aligned_bss to implement this macro. */ #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ -asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) - -#undef ESCAPES -#define ESCAPES \ -"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ -\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\ -\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ -\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ -\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ -\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" - -#undef STRING_LIMIT -#define STRING_LIMIT ((unsigned) 256) - -#undef ASM_OUTPUT_LIMITED_STRING -#define ASM_OUTPUT_LIMITED_STRING(FILE, STR) \ - do \ - { \ - register const unsigned char *_limited_str = \ - (const unsigned char *) (STR); \ - register unsigned ch; \ - fprintf ((FILE), "%s\"", STRING_ASM_OP); \ - for (; (ch = *_limited_str); _limited_str++) \ - { \ - register int escape; \ - switch (escape = ESCAPES[ch]) \ - { \ - case 0: \ - putc (ch, (FILE)); \ - break; \ - case 1: \ - fprintf ((FILE), "\\%03o", ch); \ - break; \ - default: \ - putc ('\\', (FILE)); \ - putc (escape, (FILE)); \ - break; \ - } \ - } \ - fprintf ((FILE), "\"\n"); \ - } \ - while (0) - - -#undef ASM_OUTPUT_ASCII -#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ -do { \ - register const unsigned char *_ascii_bytes = \ - (const unsigned char *) (STR); \ - register const unsigned char *limit = _ascii_bytes + (LENGTH); \ - register unsigned bytes_in_chunk = 0; \ - for (; _ascii_bytes < limit; _ascii_bytes++) \ - { \ - register unsigned const char *p; \ - if (bytes_in_chunk >= 64) \ - { \ - fputc ('\n', (FILE)); \ - bytes_in_chunk = 0; \ - } \ - for (p = _ascii_bytes; p < limit && *p != '\0'; p++) \ - continue; \ - if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT) \ - { \ - if (bytes_in_chunk > 0) \ - { \ - fputc ('\n', (FILE)); \ - bytes_in_chunk = 0; \ - } \ - ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes); \ - _ascii_bytes = p; \ - } \ - else \ - { \ - if (bytes_in_chunk == 0) \ - fputs ("\t.byte\t", (FILE)); \ - else \ - fputc (',', (FILE)); \ - fprintf ((FILE), "0x%02x", *_ascii_bytes); \ - bytes_in_chunk += 5; \ - } \ - } \ - if (bytes_in_chunk > 0) \ - fprintf ((FILE), "\n"); \ -} while (0) - -#undef ASM_OUTPUT_CASE_LABEL -#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,JUMPTABLE) \ -do { \ - if (TARGET_ELF) \ - ASM_OUTPUT_ALIGN ((FILE), 2); \ - ASM_OUTPUT_INTERNAL_LABEL((FILE),(PREFIX),(NUM)); \ -} while (0) - -#undef ASM_OUTPUT_IDENT -#define ASM_OUTPUT_IDENT(FILE, NAME) \ - fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME); - -#undef ASM_OUTPUT_EXTERNAL_LIBCALL -#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ - if (TARGET_ELF) (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0)) - -#undef ASM_OUTPUT_INTERNAL_LABEL -#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ - fprintf (FILE, ".%s%d:\n", PREFIX, NUM) - -/* The prefix to add to user-visible assembler symbols. */ - -#undef USER_LABEL_PREFIX -#define USER_LABEL_PREFIX "" - -/* - * We rename 'gcc_except_table' to the shorter name in preparation - * for the day when we're ready to do DWARF2 eh unwinding under COFF. - */ -/* #define EXCEPTION_SECTION() named_section (NULL, ".gccexc", 1) */ - -/* Switch into a generic section. */ -#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section - -#undef ASM_OUTPUT_SKIP -#define ASM_OUTPUT_SKIP(FILE,SIZE) \ -do { \ - if (TARGET_ELF) \ - fprintf (FILE, "%s%u\n", SKIP_ASM_OP, (SIZE)); \ - else \ - fprintf ((FILE), "%s.,.+%u\n", SET_ASM_OP, (SIZE)); \ -} while (0) - - -#undef CTOR_LIST_BEGIN -#define CTOR_LIST_BEGIN \ -do { \ - asm (CTORS_SECTION_ASM_OP); \ - if (TARGET_ELF) \ - STATIC func_ptr __CTOR_LIST__[1] = { (func_ptr) (-1) }; \ - else \ - asm ("pushl $0"); \ -} while (0) - -#undef CTOR_LIST_END -#define CTOR_LIST_END \ -do { \ - if (TARGET_ELF) { \ - asm (CTORS_SECTION_ASM_OP); \ - STATIC func_ptr __CTOR_LIST__[1] = { (func_ptr) (0) }; \ - } else { \ - CTOR_LIST_BEGIN; \ - } \ -} while (0) - -#undef DBX_BLOCKS_FUNCTION_RELATIVE -#define DBX_BLOCKS_FUNCTION_RELATIVE 1 - -#undef DBX_FUNCTION_FIRST -#define DBX_FUNCTION_FIRST 1 + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) #undef DBX_REGISTER_NUMBER -#define DBX_REGISTER_NUMBER(n) \ - ((TARGET_ELF) ? svr4_dbx_register_map[n] : dbx_register_map[n]) +#define DBX_REGISTER_NUMBER(n) svr4_dbx_register_map[n] -#define DWARF2_DEBUGGING_INFO 1 -#define DWARF_DEBUGGING_INFO 1 -#define SDB_DEBUGGING_INFO 1 -#define DBX_DEBUGGING_INFO 1 +#define DWARF2_DEBUGGING_INFO 1 +#define DWARF_DEBUGGING_INFO 1 +#define DBX_DEBUGGING_INFO 1 #undef PREFERRED_DEBUGGING_TYPE -#define PREFERRED_DEBUGGING_TYPE \ - ((TARGET_ELF) ? DWARF2_DEBUG: SDB_DEBUG) - -#undef EXTRA_SECTIONS -#define EXTRA_SECTIONS in_init, in_fini - -#undef EXTRA_SECTION_FUNCTIONS -#define EXTRA_SECTION_FUNCTIONS \ - INIT_SECTION_FUNCTION \ - FINI_SECTION_FUNCTION - -#undef FINI_SECTION_FUNCTION -#define FINI_SECTION_FUNCTION \ -void \ -fini_section () \ -{ \ - if ((!TARGET_ELF) && in_section != in_fini) \ - { \ - fprintf (asm_out_file, "%s\n", FINI_SECTION_ASM_OP); \ - in_section = in_fini; \ - } \ -} - -#undef INIT_SECTION_FUNCTION -#define INIT_SECTION_FUNCTION \ -void \ -init_section () \ -{ \ - if ((!TARGET_ELF) && in_section != in_init) \ - { \ - fprintf (asm_out_file, "%s\n", INIT_SECTION_ASM_OP); \ - in_section = in_init; \ - } \ -} - -#undef SUBTARGET_FRAME_POINTER_REQUIRED -#define SUBTARGET_FRAME_POINTER_REQUIRED \ - ((TARGET_ELF) ? 0 : \ - (current_function_calls_setjmp || current_function_calls_longjmp)) - -#undef LOCAL_LABEL_PREFIX -#define LOCAL_LABEL_PREFIX \ - ((TARGET_ELF) ? "" : ".") - -#undef MD_EXEC_PREFIX -#undef MD_STARTFILE_PREFIX -#define MD_EXEC_PREFIX "/usr/ccs/bin/" -#define MD_STARTFILE_PREFIX "/usr/ccs/lib/" - -#undef NON_SAVING_SETJMP -#define NON_SAVING_SETJMP \ - ((TARGET_ELF) ? 0 : \ - (current_function_calls_setjmp && current_function_calls_longjmp)) +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +#undef DWARF2_UNWIND_INFO +#define DWARF2_UNWIND_INFO 1 #undef NO_IMPLICIT_EXTERN_C -#define NO_IMPLICIT_EXTERN_C 1 - -/* JKJ FIXME - examine the ramifications of RETURN_IN_MEMORY and - RETURN_POPS_ARGS */ - -#undef RETURN_POPS_ARGS -#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) \ - ((TARGET_ELF) ? \ - (ix86_return_pops_args (FUNDECL, FUNTYPE, SIZE)) : \ - (((FUNDECL) && (TREE_CODE (FUNDECL) == IDENTIFIER_NODE)) ? 0 \ - : (TARGET_RTD \ - && (TYPE_ARG_TYPES (FUNTYPE) == 0 \ - || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE))) \ - == void_type_node))) ? (SIZE) \ - : 0)) - -/* ??? Ignore coff. */ -#undef TARGET_ASM_SELECT_SECTION -#define TARGET_ASM_SELECT_SECTION default_elf_select_section +#define NO_IMPLICIT_EXTERN_C 1 #undef SWITCH_TAKES_ARG #define SWITCH_TAKES_ARG(CHAR) \ @@ -511,17 +92,6 @@ init_section () \ #undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS) -#define HANDLE_SYSV_PRAGMA 1 - -/* Though OpenServer supports .weak in COFF, we don't use it. - * G++ will frequently emit a symol as .weak and then (in the same .s - * file) declare it global. The COFF assembler finds this unamusing. - */ -#define SUPPORTS_WEAK (TARGET_ELF) -#define ASM_WEAKEN_LABEL(FILE,NAME) \ - do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ - fputc ('\n', FILE); } while (0) - /* * Define sizes and types */ @@ -529,12 +99,14 @@ init_section () \ #undef PTRDIFF_TYPE #undef WCHAR_TYPE #undef WCHAR_TYPE_SIZE +#undef WINT_TYPE #undef LONG_DOUBLE_TYPE_SIZE -#define LONG_DOUBLE_TYPE_SIZE 96 #define SIZE_TYPE "unsigned int" #define PTRDIFF_TYPE "int" #define WCHAR_TYPE "long int" #define WCHAR_TYPE_SIZE BITS_PER_WORD +#define WINT_TYPE "long int" +#define LONG_DOUBLE_TYPE_SIZE 96 /* * New for multilib support. Set the default switches for multilib, @@ -548,7 +120,7 @@ init_section () \ With SCO Open Server 5.0, you now get the linker and assembler free, so that is what these specs are targeted for. These utilities are very argument sensitive: a space in the wrong place breaks everything. - So RMS, please forgive this mess. It works. + So please forgive this mess. It works. Parameters which can be passed to gcc, and their SCO equivalents: GCC Parameter SCO Equivalent @@ -564,77 +136,109 @@ init_section () \ does. SCO also allows you to compile, link and generate either ELF or COFF - binaries. With gcc, unlike the SCO compiler, the default is ELF. - Specify -mcoff to gcc to produce COFF binaries. -fpic will get the - assembler and linker to produce PIC code. + binaries. With gcc, we now only support ELF mode. + + GCC also requires that the user has installed OSS646, the Execution + Environment Update, or is running release 5.0.7 or later. This has + many fixes to the ELF link editor and assembler, and a considerably + improved libc and RTLD. + + In terms of tool usage, we want to use the standard link editor always, + and either the GNU assembler or the native assembler. With OSS646 the + native assembler has grown up quite a bit. Some of the specs below + assume that /usr/gnu is the prefix for the GNU tools, because thats + where the SCO provided ones go. This is especially important for + include and library search path ordering. We want to look in /usr/gnu + first, becuase frequently people are linking against -lintl, and they + MEAN to link with gettext. What they get is the SCO intl library. Its + a REAL pity that GNU gettext chose that name; perhaps in a future + version they can be persuaded to change it to -lgnuintl and have a + link so that -lintl will work for other systems. The same goes for + header files. We want /usr/gnu/include searched for before the system + header files. Hence the -isystem /usr/gnu/include in the CPP_SPEC. + We get /usr/gnu/lib first by virtue of the MD_STARTFILE_PREFIX below. */ +#define MD_STARTFILE_PREFIX "/usr/gnu/lib/" +#define MD_STARTFILE_PREFIX_1 "/usr/ccs/lib/" + +#if USE_GAS +# define MD_EXEC_PREFIX "/usr/gnu/bin/" +#else +# define MD_EXEC_PREFIX "/usr/ccs/bin/elf/" +#endif + +/* Always use the system linker, please. */ +#ifndef DEFAULT_LINKER +# define DEFAULT_LINKER "/usr/ccs/bin/elf/ld" +#endif + /* Set up assembler flags for PIC and ELF compilations */ #undef ASM_SPEC #if USE_GAS - /* Leave ASM_SPEC undefined so we pick up the master copy from gcc.c - * Undef MD_EXEC_PREFIX because we don't know where GAS is, but it's not - * likely in /usr/ccs/bin/ - */ -#undef MD_EXEC_PREFIX + /* Leave ASM_SPEC undefined so we pick up the master copy from gcc.c */ #else - #define ASM_SPEC \ - "-b %{!mcoff:elf}%{mcoff:coff \ - %{static:%e-static not valid with -mcoff} \ - %{shared:%e-shared not valid with -mcoff} \ - %{symbolic:%e-symbolic not valid with -mcoff}} \ - %{Ym,*} %{Yd,*} %{Wa,*:%*} \ - %{!mcoff:-E%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},ELF %{Qn:} %{!Qy:-Qn}}" + "%{Ym,*} %{Yd,*} %{Wa,*:%*} \ + -E%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},ELF %{Qn:} %{!Qy:-Qn}" #endif -/* Use crt1.o as a startup file and crtn.o as a closing file. */ +/* + * Use crti.o for shared objects, crt1.o for normal executables. Make sure + * to recognize both -G and -shared as a valid way of introducing shared + * library generation. This is important for backwards compatibility. + */ #undef STARTFILE_SPEC #define STARTFILE_SPEC \ - "%{shared: %{!mcoff: crti.o%s}} \ - %{!shared:\ + "%{pg:%e-pg not supported on this platform} \ + %{p:%{pp:%e-p and -pp specified - pick one}} \ + %{!shared:\ %{!symbolic: \ - %{pg:gcrt.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}}} \ + %{!G: \ + %{pp:pcrt1elf.o%s}%{p:mcrt1.o%s}%{!p:%{!pp:crt1.o%s}}}}} \ + crti.o%s \ %{ansi:values-Xc.o%s} \ %{!ansi: \ - %{Xa:values-Xa.o%s} \ - %{!Xa:%{Xc:values-Xc.o%s} \ - %{!Xc:%{Xk:values-Xk.o%s} \ - %{!Xk:%{Xt:values-Xt.o%s} \ - %{!Xt:values-Xa.o%s}}}}} \ - %{mcoff:crtbeginS.o%s} %{!mcoff:crtbegin.o%s}" + %{traditional:values-Xt.o%s} \ + %{!traditional: \ + %{Xa:values-Xa.o%s} \ + %{!Xa:%{Xc:values-Xc.o%s} \ + %{!Xc:%{Xk:values-Xk.o%s} \ + %{!Xk:%{Xt:values-Xt.o%s} \ + %{!Xt:values-Xa.o%s}}}}}} \ + crtbegin.o%s" #undef ENDFILE_SPEC #define ENDFILE_SPEC \ - "%{!mcoff:crtend.o%s} \ - %{mcoff:crtendS.o%s} \ - %{pg:gcrtn.o%s}%{!pg:crtn.o%s}" - -#define TARGET_OS_CPP_BUILTINS() \ - do \ - { \ - builtin_define ("__unix"); \ - builtin_define ("_SCO_DS"); \ - builtin_define ("_M_I386"); \ - builtin_define ("_M_XENIX"); \ - builtin_define ("_M_UNIX"); \ - builtin_assert ("system=svr3"); \ - if (flag_iso) \ - cpp_define (pfile, "_STRICT_ANSI"); \ - if (flag_pic) \ - { \ - builtin_define ("__PIC__"); \ - builtin_define ("__pic__"); \ - } \ - } \ + "crtend.o%s crtn.o%s" + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__unix"); \ + builtin_define ("_SCO_DS"); \ + builtin_define ("_SCO_DS_LL"); \ + builtin_define ("_SCO_ELF"); \ + builtin_define ("_M_I386"); \ + builtin_define ("_M_XENIX"); \ + builtin_define ("_M_UNIX"); \ + builtin_assert ("system=svr3"); \ + if (flag_iso) \ + cpp_define (pfile, "_STRICT_ANSI"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ while (0) #undef CPP_SPEC #define CPP_SPEC "\ - %{fpic:%{mcoff:%e-fpic is not valid with -mcoff}} \ - %{fPIC:%{mcoff:%e-fPIC is not valid with -mcoff}} \ + -isystem /usr/gnu/include \ + %{pthread:-D_REENTRANT} \ %{!Xods30:-D_STRICT_NAMES} \ %{!ansi:%{!posix:%{!Xods30:-D_SCO_XPG_VERS=4}}} \ %{ansi:-isystem include/ansi%s -isystem /usr/include/ansi} \ @@ -652,8 +256,6 @@ init_section () \ -DM_BITFIELDS -DM_SYS5 -DM_SYSV -DM_INTERNAT -DM_SYSIII \ -DM_WORDSWAP}}}} \ %{scointl:-DM_INTERNAT -D_M_INTERNAT} \ - %{!mcoff:-D_SCO_ELF} \ - %{mcoff:-D_M_COFF -D_SCO_COFF} \ %{Xa:-D_SCO_C_DIALECT=1} \ %{!Xa:%{Xc:-D_SCO_C_DIALECT=3} \ %{!Xc:%{Xk:-D_SCO_C_DIALECT=4} \ @@ -662,109 +264,38 @@ init_section () \ #undef LINK_SPEC #define LINK_SPEC \ - "-b %{!mcoff:elf}%{mcoff:coff \ - %{static:%e-static not valid with -mcoff} \ - %{shared:%e-shared not valid with -mcoff} \ - %{symbolic:%e-symbolic not valid with -mcoff} \ - %{fpic:%e-fpic not valid with -mcoff} \ - %{fPIC:%e-fPIC not valid with -mcoff}} \ - -R%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},%{mcoff:COFF}%{!mcoff:ELF} \ - %{Wl,*%*} %{YP,*} %{YL,*} %{YU,*} \ + "%{!shared:%{!symbolic:%{!G:-E%{Xa:a}%{!Xa:%{Xc:c}%{!Xc:%{Xk:k}%{!Xk:%{Xt:t}%{!Xt:a}}}},%{ansi:ansi}%{!ansi:%{posix:posix}%{!posix:%{Xpg4:xpg4}%{!Xpg4:%{Xpg4plus:XPG4PLUS}%{!Xpg4plus:%{Xods30:ods30}%{!Xods30:XPG4PLUS}}}}},ELF}}} \ + %{Wl,*:%*} %{YP,*} %{YL,*} %{YU,*} \ %{!YP,*:%{p:-YP,/usr/ccs/libp:/lib/libp:/usr/lib/libp:/usr/ccs/lib:/lib:/usr/lib} \ %{!p:-YP,/usr/ccs/lib:/lib:/usr/lib}} \ - %{h*} %{static:-dn -Bstatic} %{shared:-G -dy %{!z*:-z text}} \ - %{symbolic:-Bsymbolic -G -dy %{!z*:-z text}} %{z*} %{R*} %{Y*} \ - %{G:-G} %{!mcoff:%{Qn:} %{!Qy:-Qn}}" - -/* The SCO COFF linker gets confused on the difference between "-ofoo" - and "-o foo". So we just always force a single space. */ - -#define SWITCHES_NEED_SPACES "o" + %{h*} %{static:-dn -Bstatic %{G:%e-G and -static are mutually exclusive}} \ + %{shared:%{!G:-G}} %{G:%{!shared:-G}} %{shared:%{G:-G}} \ + %{shared:-dy %{symbolic:-Bsymbolic -G} %{z*}} %{R*} %{Y*} \ + %{Qn:} %{!Qy:-Qn} -z alt_resolve" /* Library spec. If we are not building a shared library, provide the standard libraries, as per the SCO compiler. */ #undef LIB_SPEC #define LIB_SPEC \ - "%{shared:pic/libgcc.a%s}%{!shared:%{!symbolic:-lcrypt -lgen -lc}}" + "%{shared:%{!G:pic/libgcc.a%s}} \ + %{G:%{!shared:pic/libgcc.a%s}} \ + %{shared:%{G:pic/libgcc.a%s}} \ + %{p:%{!pp:-lelfprof -lelf}} %{pp:%{!p:-lelfprof -lelf}} \ + %{!shared:%{!symbolic:%{!G:-lcrypt -lgen -lc %{pthread:-lpthread}}}}" #undef LIBGCC_SPEC #define LIBGCC_SPEC \ - "%{!shared:-lgcc}" + "%{!shared:%{!G:-lgcc}}" -#define MASK_COFF 010000000000 /* Mask for elf generation */ -#define TARGET_ELF (1) /* (!(target_flags & MASK_COFF)) */ +/* Here for legacy support only so we still accept -melf flag */ +#define MASK_COFF 010000000000 /* Mask for COFF generation */ +#define TARGET_ELF (1) #undef SUBTARGET_SWITCHES #define SUBTARGET_SWITCHES \ { "elf", -MASK_COFF, N_("Generate ELF output") }, -#define NO_DOLLAR_IN_LABEL - -/* Implicit library calls should use memcpy, not bcopy, etc. They are - faster on OpenServer libraries. */ - -#define TARGET_MEM_FUNCTIONS - -/* Biggest alignment supported by the object file format of this - machine. Use this macro to limit the alignment which can be - specified using the `__attribute__ ((aligned (N)))' construct. If - not defined, the default value is `BIGGEST_ALIGNMENT'. */ - -#define MAX_OFILE_ALIGNMENT (32768*8) - -/* Define the `__builtin_va_list' type for the ABI. On OpenServer, this - type is `char *'. */ -#undef BUILD_VA_LIST_TYPE -#define BUILD_VA_LIST_TYPE(VALIST) \ - (VALIST) = build_pointer_type (char_type_node) - - -/* -Here comes some major hackery to get the crt stuff to compile properly. -Since we can (and do) compile for both COFF and ELF environments, we -set things up accordingly, based on the pre-processor defines for ELF -and COFF. This is insane, but then I guess having one compiler with a -single back-end supporting two vastly different file format types is -a little insane too. But it is not impossible and we get a useful -compiler at the end of the day. Onward we go ... -*/ - -#if defined(CRT_BEGIN) || defined(CRT_END) || defined(IN_LIBGCC2) -# undef OBJECT_FORMAT_ELF -# undef INIT_SECTION_ASM_OP -# undef FINI_SECTION_ASM_OP -# undef CTORS_SECTION_ASM_OP -# undef DTORS_SECTION_ASM_OP -# undef EH_FRAME_SECTION_NAME -# undef CTOR_LIST_BEGIN -# undef CTOR_LIST_END -# undef DO_GLOBAL_CTORS_BODY - -# if defined (_SCO_ELF) -# define OBJECT_FORMAT_ELF -# define INIT_SECTION_ASM_OP INIT_SECTION_ASM_OP_ELF -# define FINI_SECTION_ASM_OP FINI_SECTION_ASM_OP_ELF -# define DTORS_SECTION_ASM_OP DTORS_SECTION_ASM_OP_ELF -# define CTORS_SECTION_ASM_OP CTORS_SECTION_ASM_OP_ELF -# define EH_FRAME_SECTION_NAME EH_FRAME_SECTION_NAME_ELF -# else /* ! _SCO_ELF */ -# define INIT_SECTION_ASM_OP INIT_SECTION_ASM_OP_COFF -# define FINI_SECTION_ASM_OP FINI_SECTION_ASM_OP_COFF -# define DTORS_SECTION_ASM_OP DTORS_SECTION_ASM_OP_COFF -# define CTORS_SECTION_ASM_OP CTORS_SECTION_ASM_OP_COFF -# define EH_FRAME_SECTION_NAME EH_FRAME_SECTION_NAME_COFF -# define CTOR_LIST_BEGIN asm (INIT_SECTION_ASM_OP); asm ("pushl $0") -# define CTOR_LIST_END CTOR_LIST_BEGIN -# define DO_GLOBAL_CTORS_BODY \ -do { \ - func_ptr *p, *beg = alloca(0); \ - for (p = beg; *p;) \ - (*p++) (); \ -} while (0) -# endif /* ! _SCO_ELF */ -#endif /* CRT_BEGIN !! CRT_END */ - /* Handle special EH pointer encodings. Absolute, pc-relative, and indirect are handled automatically. */ #define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \ @@ -798,10 +329,3 @@ do { \ : "=d"(BASE)) #endif -/* Select a format to encode pointers in exception handling data. CODE - is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is - true if the symbol may be affected by dynamic relocations. */ -#undef ASM_PREFERRED_EH_DATA_FORMAT -#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ - (flag_pic ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_datarel \ - : DW_EH_PE_absptr) diff --git a/contrib/gcc/config/i386/sol2.h b/contrib/gcc/config/i386/sol2.h index fb5a184..9089a1d 100644 --- a/contrib/gcc/config/i386/sol2.h +++ b/contrib/gcc/config/i386/sol2.h @@ -1,5 +1,5 @@ /* Target definitions for GNU compiler for Intel 80386 running Solaris 2 - Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 + Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. Contributed by Fred Fish (fnf@cygnus.com). @@ -20,8 +20,6 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#define CMOV_SUN_AS_SYNTAX 1 - /* The Solaris 2.0 x86 linker botches alignment of code sections. It tries to align to a 16 byte boundary by padding with 0x00000090 ints, rather than 0x90 bytes (nop). This generates trash in the diff --git a/contrib/gcc/config/i386/t-sco5 b/contrib/gcc/config/i386/t-sco5 index d0c457e..f92a624 100644 --- a/contrib/gcc/config/i386/t-sco5 +++ b/contrib/gcc/config/i386/t-sco5 @@ -1,18 +1,16 @@ -# We need to use -fPIC when we are using gcc to compile the routines in -# crtstuff.c. This is only really needed when we are going to use gcc/g++ -# to produce a shared library, but since we don't know ahead of time when -# we will be doing that, we just always use -fPIC when compiling the -# routines in crtstuff.c. Likewise for libgcc2.c. This is less painful -# than multilibbing everything with PIC and PIC-not variants. - -# The pushl in CTOR initialization interferes with frame pointer elimination. - +# We multilib libgcc for -fPIC, to get real PIC code in it. +# NOTE: We must use -fPIC on crt{begi,end}.o else we get an RTLD error +# "cant set protections on segment of length blah at 0x8048000". CRTSTUFF_T_CFLAGS = -fPIC -fno-omit-frame-pointer -TARGET_LIBGCC2_CFLAGS = -fPIC -crti.o: $(srcdir)/config/i386/sol2-ci.asm $(GCC_PASSES) - sed -e '/^!/d' <$(srcdir)/config/i386/sol2-ci.asm >crti.s - $(GCC_FOR_TARGET) -c -o crti.o crti.s +MULTILIB_OPTIONS = fPIC +MULTILIB_DIRNAMES = pic +MUTLILIB_EXCEPTIONS = +MULTILIB_MATCHES = fPIC=fpic +MULTILIB_EXTRA_OPTS = + +LIBGCC=stmp-multilib +INSTALL_LIBGCC=install-multilib # See all the declarations. FIXPROTO_DEFINES = -D_XOPEN_SOURCE -D_POSIX_C_SOURCE=2 diff --git a/contrib/gcc/config/i386/xmmintrin.h b/contrib/gcc/config/i386/xmmintrin.h index 43a05c1..1829ab0 100644 --- a/contrib/gcc/config/i386/xmmintrin.h +++ b/contrib/gcc/config/i386/xmmintrin.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002 Free Software Foundation, Inc. +/* Copyright (C) 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -25,7 +25,7 @@ Public License. */ /* Implemented from the specification included in the Intel C++ Compiler - User Guide and Reference, version 5.0. */ + User Guide and Reference, version 8.0. */ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED @@ -475,6 +475,12 @@ _mm_cvtss_si32 (__m128 __A) return __builtin_ia32_cvtss2si ((__v4sf) __A); } +static __inline int +_mm_cvt_ss2si (__m128 __A) +{ + return _mm_cvtss_si32 (__A); +} + #ifdef __x86_64__ /* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */ @@ -493,6 +499,12 @@ _mm_cvtps_pi32 (__m128 __A) return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A); } +static __inline __m64 +_mm_cvt_ps2pi (__m128 __A) +{ + return _mm_cvtps_pi32 (__A); +} + /* Truncate the lower SPFP value to a 32-bit integer. */ static __inline int _mm_cvttss_si32 (__m128 __A) @@ -500,6 +512,12 @@ _mm_cvttss_si32 (__m128 __A) return __builtin_ia32_cvttss2si ((__v4sf) __A); } +static __inline int +_mm_cvtt_ss2si (__m128 __A) +{ + return _mm_cvttss_si32 (__A); +} + #ifdef __x86_64__ /* Truncate the lower SPFP value to a 32-bit integer. */ static __inline long long @@ -517,6 +535,12 @@ _mm_cvttps_pi32 (__m128 __A) return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A); } +static __inline __m64 +_mm_cvtt_ps2pi (__m128 __A) +{ + return _mm_cvttps_pi32 (__A); +} + /* Convert B to a SPFP value and insert it as element zero in A. */ static __inline __m128 _mm_cvtsi32_ss (__m128 __A, int __B) @@ -524,6 +548,12 @@ _mm_cvtsi32_ss (__m128 __A, int __B) return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); } +static __inline __m128 +_mm_cvt_si2ss (__m128 __A, int __B) +{ + return _mm_cvtsi32_ss (__A, __B); +} + #ifdef __x86_64__ /* Convert B to a SPFP value and insert it as element zero in A. */ static __inline __m128 @@ -541,6 +571,12 @@ _mm_cvtpi32_ps (__m128 __A, __m64 __B) return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B); } +static __inline __m128 +_mm_cvt_pi2ps (__m128 __A, __m64 __B) +{ + return _mm_cvtpi32_ps (__A, __B); +} + /* Convert the four signed 16-bit values in A to SPFP form. */ static __inline __m128 _mm_cvtpi16_ps (__m64 __A) @@ -942,9 +978,16 @@ _mm_extract_pi16 (__m64 __A, int __N) { return __builtin_ia32_pextrw ((__v4hi)__A, __N); } + +static __inline int +_m_pextrw (__m64 __A, int __N) +{ + return _mm_extract_pi16 (__A, __N); +} #else #define _mm_extract_pi16(A, N) \ __builtin_ia32_pextrw ((__v4hi)(A), (N)) +#define _m_pextrw(A, N) _mm_extract_pi16((A), (N)) #endif /* Inserts word D into one of four words of A. The selector N must be @@ -955,9 +998,16 @@ _mm_insert_pi16 (__m64 __A, int __D, int __N) { return (__m64)__builtin_ia32_pinsrw ((__v4hi)__A, __D, __N); } + +static __inline __m64 +_m_pinsrw (__m64 __A, int __D, int __N) +{ + return _mm_insert_pi16 (__A, __D, __N); +} #else #define _mm_insert_pi16(A, D, N) \ ((__m64) __builtin_ia32_pinsrw ((__v4hi)(A), (D), (N))) +#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N)) #endif /* Compute the element-wise maximum of signed 16-bit values. */ @@ -967,6 +1017,12 @@ _mm_max_pi16 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B); } +static __inline __m64 +_m_pmaxsw (__m64 __A, __m64 __B) +{ + return _mm_max_pi16 (__A, __B); +} + /* Compute the element-wise maximum of unsigned 8-bit values. */ static __inline __m64 _mm_max_pu8 (__m64 __A, __m64 __B) @@ -974,6 +1030,12 @@ _mm_max_pu8 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B); } +static __inline __m64 +_m_pmaxub (__m64 __A, __m64 __B) +{ + return _mm_max_pu8 (__A, __B); +} + /* Compute the element-wise minimum of signed 16-bit values. */ static __inline __m64 _mm_min_pi16 (__m64 __A, __m64 __B) @@ -981,6 +1043,12 @@ _mm_min_pi16 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B); } +static __inline __m64 +_m_pminsw (__m64 __A, __m64 __B) +{ + return _mm_min_pi16 (__A, __B); +} + /* Compute the element-wise minimum of unsigned 8-bit values. */ static __inline __m64 _mm_min_pu8 (__m64 __A, __m64 __B) @@ -988,6 +1056,12 @@ _mm_min_pu8 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B); } +static __inline __m64 +_m_pminub (__m64 __A, __m64 __B) +{ + return _mm_min_pu8 (__A, __B); +} + /* Create an 8-bit mask of the signs of 8-bit values. */ static __inline int _mm_movemask_pi8 (__m64 __A) @@ -995,6 +1069,12 @@ _mm_movemask_pi8 (__m64 __A) return __builtin_ia32_pmovmskb ((__v8qi)__A); } +static __inline int +_m_pmovmskb (__m64 __A) +{ + return _mm_movemask_pi8 (__A); +} + /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values in B and produce the high 16 bits of the 32-bit results. */ static __inline __m64 @@ -1003,6 +1083,12 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B); } +static __inline __m64 +_m_pmulhuw (__m64 __A, __m64 __B) +{ + return _mm_mulhi_pu16 (__A, __B); +} + /* Return a combination of the four 16-bit values in A. The selector must be an immediate. */ #if 0 @@ -1011,9 +1097,16 @@ _mm_shuffle_pi16 (__m64 __A, int __N) { return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N); } + +static __inline __m64 +_m_pshufw (__m64 __A, int __N) +{ + return _mm_shuffle_pi16 (__A, __N); +} #else #define _mm_shuffle_pi16(A, N) \ ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N))) +#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N)) #endif /* Conditionally store byte elements of A into P. The high bit of each @@ -1025,6 +1118,12 @@ _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); } +static __inline void +_m_maskmovq (__m64 __A, __m64 __N, char *__P) +{ + _mm_maskmove_si64 (__A, __N, __P); +} + /* Compute the rounded averages of the unsigned 8-bit values in A and B. */ static __inline __m64 _mm_avg_pu8 (__m64 __A, __m64 __B) @@ -1032,6 +1131,12 @@ _mm_avg_pu8 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B); } +static __inline __m64 +_m_pavgb (__m64 __A, __m64 __B) +{ + return _mm_avg_pu8 (__A, __B); +} + /* Compute the rounded averages of the unsigned 16-bit values in A and B. */ static __inline __m64 _mm_avg_pu16 (__m64 __A, __m64 __B) @@ -1039,6 +1144,12 @@ _mm_avg_pu16 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B); } +static __inline __m64 +_m_pavgw (__m64 __A, __m64 __B) +{ + return _mm_avg_pu16 (__A, __B); +} + /* Compute the sum of the absolute differences of the unsigned 8-bit values in A and B. Return the value in the lower 16-bit word; the upper words are cleared. */ @@ -1048,6 +1159,12 @@ _mm_sad_pu8 (__m64 __A, __m64 __B) return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B); } +static __inline __m64 +_m_psadbw (__m64 __A, __m64 __B) +{ + return _mm_sad_pu8 (__A, __B); +} + /* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */ #if 0 @@ -1106,1469 +1223,8 @@ do { \ (row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD); \ } while (0) -#ifdef __SSE2__ -/* SSE2 */ -typedef int __v2df __attribute__ ((mode (V2DF))); -typedef int __v2di __attribute__ ((mode (V2DI))); -typedef int __v4si __attribute__ ((mode (V4SI))); -typedef int __v8hi __attribute__ ((mode (V8HI))); -typedef int __v16qi __attribute__ ((mode (V16QI))); - -/* Create a selector for use with the SHUFPD instruction. */ -#define _MM_SHUFFLE2(fp1,fp0) \ - (((fp1) << 1) | (fp0)) - -#define __m128i __v2di -#define __m128d __v2df - -/* Create a vector with element 0 as *P and the rest zero. */ -static __inline __m128d -_mm_load_sd (double const *__P) -{ - return (__m128d) __builtin_ia32_loadsd (__P); -} - -/* Create a vector with all two elements equal to *P. */ -static __inline __m128d -_mm_load1_pd (double const *__P) -{ - __v2df __tmp = __builtin_ia32_loadsd (__P); - return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); -} - -static __inline __m128d -_mm_load_pd1 (double const *__P) -{ - return _mm_load1_pd (__P); -} - -/* Load two DPFP values from P. The addresd must be 16-byte aligned. */ -static __inline __m128d -_mm_load_pd (double const *__P) -{ - return (__m128d) __builtin_ia32_loadapd (__P); -} - -/* Load two DPFP values from P. The addresd need not be 16-byte aligned. */ -static __inline __m128d -_mm_loadu_pd (double const *__P) -{ - return (__m128d) __builtin_ia32_loadupd (__P); -} - -/* Load two DPFP values in reverse order. The addresd must be aligned. */ -static __inline __m128d -_mm_loadr_pd (double const *__P) -{ - __v2df __tmp = __builtin_ia32_loadapd (__P); - return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); -} - -/* Create a vector with element 0 as F and the rest zero. */ -static __inline __m128d -_mm_set_sd (double __F) -{ - return (__m128d) __builtin_ia32_loadsd (&__F); -} - -/* Create a vector with all two elements equal to F. */ -static __inline __m128d -_mm_set1_pd (double __F) -{ - __v2df __tmp = __builtin_ia32_loadsd (&__F); - return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); -} - -static __inline __m128d -_mm_set_pd1 (double __F) -{ - return _mm_set1_pd (__F); -} - -/* Create the vector [Z Y]. */ -static __inline __m128d -_mm_set_pd (double __Z, double __Y) -{ - union { - double __a[2]; - __m128d __v; - } __u; - - __u.__a[0] = __Y; - __u.__a[1] = __Z; - - return __u.__v; -} - -/* Create the vector [Y Z]. */ -static __inline __m128d -_mm_setr_pd (double __Z, double __Y) -{ - return _mm_set_pd (__Y, __Z); -} - -/* Create a vector of zeros. */ -static __inline __m128d -_mm_setzero_pd (void) -{ - return (__m128d) __builtin_ia32_setzeropd (); -} - -/* Stores the lower DPFP value. */ -static __inline void -_mm_store_sd (double *__P, __m128d __A) -{ - __builtin_ia32_storesd (__P, (__v2df)__A); -} - -/* Store the lower DPFP value acrosd two words. */ -static __inline void -_mm_store1_pd (double *__P, __m128d __A) -{ - __v2df __va = (__v2df)__A; - __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0)); - __builtin_ia32_storeapd (__P, __tmp); -} - -static __inline void -_mm_store_pd1 (double *__P, __m128d __A) -{ - _mm_store1_pd (__P, __A); -} - -/* Store two DPFP values. The addresd must be 16-byte aligned. */ -static __inline void -_mm_store_pd (double *__P, __m128d __A) -{ - __builtin_ia32_storeapd (__P, (__v2df)__A); -} - -/* Store two DPFP values. The addresd need not be 16-byte aligned. */ -static __inline void -_mm_storeu_pd (double *__P, __m128d __A) -{ - __builtin_ia32_storeupd (__P, (__v2df)__A); -} - -/* Store two DPFP values in reverse order. The addresd must be aligned. */ -static __inline void -_mm_storer_pd (double *__P, __m128d __A) -{ - __v2df __va = (__v2df)__A; - __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1)); - __builtin_ia32_storeapd (__P, __tmp); -} - -/* Sets the low DPFP value of A from the low value of B. */ -static __inline __m128d -_mm_move_sd (__m128d __A, __m128d __B) -{ - return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); -} - - -static __inline __m128d -_mm_add_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_add_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_sub_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_sub_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_mul_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_mul_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_div_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_div_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_sqrt_pd (__m128d __A) -{ - return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); -} - -/* Return pair {sqrt (A[0), B[1]}. */ -static __inline __m128d -_mm_sqrt_sd (__m128d __A, __m128d __B) -{ - __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); - return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); -} - -static __inline __m128d -_mm_min_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_min_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_max_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_max_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_and_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_andnot_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_or_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_xor_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpeq_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmplt_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmple_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpgt_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpge_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpneq_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpnlt_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpnle_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpngt_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpnge_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpord_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpunord_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpeq_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmplt_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmple_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpgt_sd (__m128d __A, __m128d __B) -{ - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, - (__v2df) - __builtin_ia32_cmpltsd ((__v2df) __B, - (__v2df) - __A)); -} - -static __inline __m128d -_mm_cmpge_sd (__m128d __A, __m128d __B) -{ - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, - (__v2df) - __builtin_ia32_cmplesd ((__v2df) __B, - (__v2df) - __A)); -} - -static __inline __m128d -_mm_cmpneq_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpnlt_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpnle_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpngt_sd (__m128d __A, __m128d __B) -{ - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, - (__v2df) - __builtin_ia32_cmpnltsd ((__v2df) __B, - (__v2df) - __A)); -} - -static __inline __m128d -_mm_cmpnge_sd (__m128d __A, __m128d __B) -{ - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, - (__v2df) - __builtin_ia32_cmpnlesd ((__v2df) __B, - (__v2df) - __A)); -} - -static __inline __m128d -_mm_cmpord_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_cmpunord_sd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_comieq_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_comilt_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_comile_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_comigt_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_comige_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_comineq_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_ucomieq_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_ucomilt_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_ucomile_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_ucomigt_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_ucomige_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); -} - -static __inline int -_mm_ucomineq_sd (__m128d __A, __m128d __B) -{ - return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); -} - -/* Create a vector with element 0 as *P and the rest zero. */ - -static __inline __m128i -_mm_load_si128 (__m128i const *__P) -{ - return (__m128i) __builtin_ia32_loaddqa ((char const *)__P); -} - -static __inline __m128i -_mm_loadu_si128 (__m128i const *__P) -{ - return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); -} - -static __inline __m128i -_mm_loadl_epi64 (__m128i const *__P) -{ - return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P); -} - -static __inline void -_mm_store_si128 (__m128i *__P, __m128i __B) -{ - __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B); -} - -static __inline void -_mm_storeu_si128 (__m128i *__P, __m128i __B) -{ - __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); -} - -static __inline void -_mm_storel_epi64 (__m128i *__P, __m128i __B) -{ - *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B); -} - -static __inline __m64 -_mm_movepi64_pi64 (__m128i __B) -{ - return (__m64) __builtin_ia32_movdq2q ((__v2di)__B); -} - -static __inline __m128i -_mm_move_epi64 (__m128i __A) -{ - return (__m128i) __builtin_ia32_movq ((__v2di)__A); -} - -/* Create a vector of zeros. */ -static __inline __m128i -_mm_setzero_si128 (void) -{ - return (__m128i) __builtin_ia32_setzero128 (); -} - -static __inline __m128i -_mm_set_epi64 (__m64 __A, __m64 __B) -{ - __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); - __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); - return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp); -} - -/* Create the vector [Z Y X W]. */ -static __inline __m128i -_mm_set_epi32 (int __Z, int __Y, int __X, int __W) -{ - union { - int __a[4]; - __m128i __v; - } __u; - - __u.__a[0] = __W; - __u.__a[1] = __X; - __u.__a[2] = __Y; - __u.__a[3] = __Z; - - return __u.__v; -} - -#ifdef __x86_64__ -/* Create the vector [Z Y]. */ -static __inline __m128i -_mm_set_epi64x (long long __Z, long long __Y) -{ - union { - long __a[2]; - __m128i __v; - } __u; - - __u.__a[0] = __Y; - __u.__a[1] = __Z; - - return __u.__v; -} -#endif - -/* Create the vector [S T U V Z Y X W]. */ -static __inline __m128i -_mm_set_epi16 (short __Z, short __Y, short __X, short __W, - short __V, short __U, short __T, short __S) -{ - union { - short __a[8]; - __m128i __v; - } __u; - - __u.__a[0] = __S; - __u.__a[1] = __T; - __u.__a[2] = __U; - __u.__a[3] = __V; - __u.__a[4] = __W; - __u.__a[5] = __X; - __u.__a[6] = __Y; - __u.__a[7] = __Z; - - return __u.__v; -} - -/* Create the vector [S T U V Z Y X W]. */ -static __inline __m128i -_mm_set_epi8 (char __Z, char __Y, char __X, char __W, - char __V, char __U, char __T, char __S, - char __Z1, char __Y1, char __X1, char __W1, - char __V1, char __U1, char __T1, char __S1) -{ - union { - char __a[16]; - __m128i __v; - } __u; - - __u.__a[0] = __S1; - __u.__a[1] = __T1; - __u.__a[2] = __U1; - __u.__a[3] = __V1; - __u.__a[4] = __W1; - __u.__a[5] = __X1; - __u.__a[6] = __Y1; - __u.__a[7] = __Z1; - __u.__a[8] = __S; - __u.__a[9] = __T; - __u.__a[10] = __U; - __u.__a[11] = __V; - __u.__a[12] = __W; - __u.__a[13] = __X; - __u.__a[14] = __Y; - __u.__a[15] = __Z; - - return __u.__v; -} - -static __inline __m128i -_mm_set1_epi64 (__m64 __A) -{ - __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); - return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp); -} - -static __inline __m128i -_mm_set1_epi32 (int __A) -{ - __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A); - return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); -} - -#ifdef __x86_64__ -static __inline __m128i -_mm_set1_epi64x (long long __A) -{ - __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); - return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0)); -} -#endif - -static __inline __m128i -_mm_set1_epi16 (short __A) -{ - int __Acopy = (unsigned short)__A; - __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); - __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp); - return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); -} - -static __inline __m128i -_mm_set1_epi8 (char __A) -{ - int __Acopy = (unsigned char)__A; - __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); - __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); - __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); - return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); -} - -static __inline __m128i -_mm_setr_epi64 (__m64 __A, __m64 __B) -{ - __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); - __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); - return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2); -} - -/* Create the vector [Z Y X W]. */ -static __inline __m128i -_mm_setr_epi32 (int __W, int __X, int __Y, int __Z) -{ - union { - int __a[4]; - __m128i __v; - } __u; - - __u.__a[0] = __W; - __u.__a[1] = __X; - __u.__a[2] = __Y; - __u.__a[3] = __Z; - - return __u.__v; -} -/* Create the vector [S T U V Z Y X W]. */ -static __inline __m128i -_mm_setr_epi16 (short __S, short __T, short __U, short __V, - short __W, short __X, short __Y, short __Z) -{ - union { - short __a[8]; - __m128i __v; - } __u; - - __u.__a[0] = __S; - __u.__a[1] = __T; - __u.__a[2] = __U; - __u.__a[3] = __V; - __u.__a[4] = __W; - __u.__a[5] = __X; - __u.__a[6] = __Y; - __u.__a[7] = __Z; - - return __u.__v; -} - -/* Create the vector [S T U V Z Y X W]. */ -static __inline __m128i -_mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1, - char __W1, char __X1, char __Y1, char __Z1, - char __S, char __T, char __U, char __V, - char __W, char __X, char __Y, char __Z) -{ - union { - char __a[16]; - __m128i __v; - } __u; - - __u.__a[0] = __S1; - __u.__a[1] = __T1; - __u.__a[2] = __U1; - __u.__a[3] = __V1; - __u.__a[4] = __W1; - __u.__a[5] = __X1; - __u.__a[6] = __Y1; - __u.__a[7] = __Z1; - __u.__a[8] = __S; - __u.__a[9] = __T; - __u.__a[10] = __U; - __u.__a[11] = __V; - __u.__a[12] = __W; - __u.__a[13] = __X; - __u.__a[14] = __Y; - __u.__a[15] = __Z; - - return __u.__v; -} - -static __inline __m128d -_mm_cvtepi32_pd (__m128i __A) -{ - return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); -} - -static __inline __m128 -_mm_cvtepi32_ps (__m128i __A) -{ - return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); -} - -static __inline __m128i -_mm_cvtpd_epi32 (__m128d __A) -{ - return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); -} - -static __inline __m64 -_mm_cvtpd_pi32 (__m128d __A) -{ - return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); -} - -static __inline __m128 -_mm_cvtpd_ps (__m128d __A) -{ - return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); -} - -static __inline __m128i -_mm_cvttpd_epi32 (__m128d __A) -{ - return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); -} - -static __inline __m64 -_mm_cvttpd_pi32 (__m128d __A) -{ - return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); -} - -static __inline __m128d -_mm_cvtpi32_pd (__m64 __A) -{ - return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); -} - -static __inline __m128i -_mm_cvtps_epi32 (__m128 __A) -{ - return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); -} - -static __inline __m128i -_mm_cvttps_epi32 (__m128 __A) -{ - return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); -} - -static __inline __m128d -_mm_cvtps_pd (__m128 __A) -{ - return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); -} - -static __inline int -_mm_cvtsd_si32 (__m128d __A) -{ - return __builtin_ia32_cvtsd2si ((__v2df) __A); -} - -#ifdef __x86_64__ -static __inline long long -_mm_cvtsd_si64x (__m128d __A) -{ - return __builtin_ia32_cvtsd2si64 ((__v2df) __A); -} -#endif - -static __inline int -_mm_cvttsd_si32 (__m128d __A) -{ - return __builtin_ia32_cvttsd2si ((__v2df) __A); -} - -#ifdef __x86_64__ -static __inline long long -_mm_cvttsd_si64x (__m128d __A) -{ - return __builtin_ia32_cvttsd2si64 ((__v2df) __A); -} -#endif - -static __inline __m128 -_mm_cvtsd_ss (__m128 __A, __m128d __B) -{ - return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); -} - -static __inline __m128d -_mm_cvtsi32_sd (__m128d __A, int __B) -{ - return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); -} - -#ifdef __x86_64__ -static __inline __m128d -_mm_cvtsi64x_sd (__m128d __A, long long __B) -{ - return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); -} -#endif - -static __inline __m128d -_mm_cvtss_sd (__m128d __A, __m128 __B) -{ - return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); -} - -#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C))) - -static __inline __m128d -_mm_unpackhi_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_unpacklo_pd (__m128d __A, __m128d __B) -{ - return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_loadh_pd (__m128d __A, double const *__B) -{ - return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B); -} - -static __inline void -_mm_storeh_pd (double *__A, __m128d __B) -{ - __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B); -} - -static __inline __m128d -_mm_loadl_pd (__m128d __A, double const *__B) -{ - return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B); -} - -static __inline void -_mm_storel_pd (double *__A, __m128d __B) -{ - __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B); -} - -static __inline int -_mm_movemask_pd (__m128d __A) -{ - return __builtin_ia32_movmskpd ((__v2df)__A); -} - -static __inline __m128i -_mm_packs_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_packs_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_packus_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_unpackhi_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_unpackhi_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_unpackhi_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_unpackhi_epi64 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_unpacklo_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_unpacklo_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_unpacklo_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_unpacklo_epi64 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_add_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_add_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_add_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_add_epi64 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_adds_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_adds_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_adds_epu8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_adds_epu16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_sub_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_sub_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_sub_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_sub_epi64 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_subs_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_subs_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_subs_epu8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_subs_epu16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_madd_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_mulhi_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_mullo_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m64 -_mm_mul_su32 (__m64 __A, __m64 __B) -{ - return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); -} - -static __inline __m128i -_mm_mul_epu32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_sll_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_sll_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_sll_epi64 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_sra_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_sra_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_srl_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_srl_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_srl_epi64 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_slli_epi16 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); -} - -static __inline __m128i -_mm_slli_epi32 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); -} - -static __inline __m128i -_mm_slli_epi64 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); -} - -static __inline __m128i -_mm_srai_epi16 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); -} - -static __inline __m128i -_mm_srai_epi32 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); -} - -#if 0 -static __m128i __attribute__((__always_inline__)) -_mm_srli_si128 (__m128i __A, const int __B) -{ - return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) -} - -static __m128i __attribute__((__always_inline__)) -_mm_srli_si128 (__m128i __A, const int __B) -{ - return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) -} -#endif -#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) -#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) - -static __inline __m128i -_mm_srli_epi16 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); -} - -static __inline __m128i -_mm_srli_epi32 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); -} - -static __inline __m128i -_mm_srli_epi64 (__m128i __A, int __B) -{ - return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); -} - -static __inline __m128i -_mm_and_si128 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_andnot_si128 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_or_si128 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_xor_si128 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); -} - -static __inline __m128i -_mm_cmpeq_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_cmpeq_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_cmpeq_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); -} - -static __inline __m128i -_mm_cmplt_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); -} - -static __inline __m128i -_mm_cmplt_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); -} - -static __inline __m128i -_mm_cmplt_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); -} - -static __inline __m128i -_mm_cmpgt_epi8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_cmpgt_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_cmpgt_epi32 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); -} - -#define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B) - -#define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C)) - -static __inline __m128i -_mm_max_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_max_epu8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_min_epi16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_min_epu8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline int -_mm_movemask_epi8 (__m128i __A) -{ - return __builtin_ia32_pmovmskb128 ((__v16qi)__A); -} - -static __inline __m128i -_mm_mulhi_epu16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); -} - -#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) -#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) -#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) - -static __inline void -_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) -{ - __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); -} - -static __inline __m128i -_mm_avg_epu8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline __m128i -_mm_avg_epu16 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); -} - -static __inline __m128i -_mm_sad_epu8 (__m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); -} - -static __inline void -_mm_stream_si32 (int *__A, int __B) -{ - __builtin_ia32_movnti (__A, __B); -} - -static __inline void -_mm_stream_si128 (__m128i *__A, __m128i __B) -{ - __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); -} - -static __inline void -_mm_stream_pd (double *__A, __m128d __B) -{ - __builtin_ia32_movntpd (__A, (__v2df)__B); -} - -static __inline __m128i -_mm_movpi64_epi64 (__m64 __A) -{ - return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A); -} - -static __inline void -_mm_clflush (void const *__A) -{ - return __builtin_ia32_clflush (__A); -} - -static __inline void -_mm_lfence (void) -{ - __builtin_ia32_lfence (); -} - -static __inline void -_mm_mfence (void) -{ - __builtin_ia32_mfence (); -} - -static __inline __m128i -_mm_cvtsi32_si128 (int __A) -{ - return (__m128i) __builtin_ia32_loadd (&__A); -} - -#ifdef __x86_64__ -static __inline __m128i -_mm_cvtsi64x_si128 (long long __A) -{ - return (__m128i) __builtin_ia32_movq2dq (__A); -} -#endif - -static __inline int -_mm_cvtsi128_si32 (__m128i __A) -{ - int __tmp; - __builtin_ia32_stored (&__tmp, (__v4si)__A); - return __tmp; -} - -#ifdef __x86_64__ -static __inline long long -_mm_cvtsi128_si64x (__m128i __A) -{ - return __builtin_ia32_movdq2q ((__v2di)__A); -} -#endif - -#endif /* __SSE2__ */ +/* For backward source compatibility. */ +#include <emmintrin.h> #endif /* __SSE__ */ #endif /* _XMMINTRIN_H_INCLUDED */ diff --git a/contrib/gcc/config/ia64/hpux.h b/contrib/gcc/config/ia64/hpux.h index 56c601b..90303ea 100644 --- a/contrib/gcc/config/ia64/hpux.h +++ b/contrib/gcc/config/ia64/hpux.h @@ -49,6 +49,13 @@ do { \ } \ } while (0) +#undef CPP_SPEC +#define CPP_SPEC \ + "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}" +/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD. These + affect only aCC's C++ library (Rogue Wave-derived) which we do not + use, and they violate the user's name space. */ + #undef ASM_EXTRA_SPEC #define ASM_EXTRA_SPEC "%{milp32:-milp32} %{mlp64:-mlp64}" @@ -68,6 +75,7 @@ do { \ #undef LIB_SPEC #define LIB_SPEC \ "%{!shared: \ + %{mt|pthread:-lpthread} \ %{p:%{!mlp64:-L/usr/lib/hpux32/libp} \ %{mlp64:-L/usr/lib/hpux64/libp} -lprof} \ %{pg:%{!mlp64:-L/usr/lib/hpux32/libp} \ @@ -134,6 +142,10 @@ do { \ #undef TARGET_HPUX_LD #define TARGET_HPUX_LD 1 +/* The HPUX dynamic linker objects to weak symbols with no + definitions, so do not use them in gthr-posix.h. */ +#define GTHREAD_USE_WEAK 0 + /* Put out the needed function declarations at the end. */ #define ASM_FILE_END(STREAM) ia64_hpux_asm_file_end(STREAM) @@ -144,6 +156,10 @@ do { \ #undef DTORS_SECTION_ASM_OP #define DTORS_SECTION_ASM_OP "\t.section\t.fini_array,\t\"aw\",\"fini_array\"" +/* The init_array/fini_array technique does not permit the use of + initialization priorities. */ +#define SUPPORTS_INIT_PRIORITY 0 + #undef READONLY_DATA_SECTION_ASM_OP #define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata,\t\"a\",\t\"progbits\"" diff --git a/contrib/gcc/config/ia64/ia64-protos.h b/contrib/gcc/config/ia64/ia64-protos.h index 72c2279..f25bb02 100644 --- a/contrib/gcc/config/ia64/ia64-protos.h +++ b/contrib/gcc/config/ia64/ia64-protos.h @@ -135,6 +135,9 @@ extern void ia64_override_options PARAMS((void)); extern int ia64_dbx_register_number PARAMS((int)); extern bool ia64_function_ok_for_sibcall PARAMS ((tree)); +extern rtx ia64_return_addr_rtx PARAMS ((HOST_WIDE_INT, rtx)); +extern void ia64_split_return_addr_rtx PARAMS ((rtx)); + #ifdef SDATA_SECTION_ASM_OP extern void sdata_section PARAMS ((void)); #endif diff --git a/contrib/gcc/config/ia64/ia64.c b/contrib/gcc/config/ia64/ia64.c index 0f34d8f..12f3204 100644 --- a/contrib/gcc/config/ia64/ia64.c +++ b/contrib/gcc/config/ia64/ia64.c @@ -175,8 +175,9 @@ static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode, tree, rtx)); static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode, tree, rtx)); -static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int, - tree, rtx)); +static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, + enum machine_mode, + int, tree, rtx)); static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode, tree, rtx)); static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx)); @@ -1154,6 +1155,7 @@ ia64_expand_move (op0, op1) if ((tls_kind = tls_symbolic_operand (op1, Pmode))) { rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; + rtx orig_op0 = op0; switch (tls_kind) { @@ -1177,8 +1179,10 @@ ia64_expand_move (op0, op1) insns = get_insns (); end_sequence (); + if (GET_MODE (op0) != Pmode) + op0 = tga_ret; emit_libcall_block (insns, op0, tga_ret, op1); - return NULL_RTX; + break; case TLS_MODEL_LOCAL_DYNAMIC: /* ??? This isn't the completely proper way to do local-dynamic @@ -1206,20 +1210,15 @@ ia64_expand_move (op0, op1) tmp = gen_reg_rtx (Pmode); emit_libcall_block (insns, tmp, tga_ret, tga_eqv); - if (register_operand (op0, Pmode)) - tga_ret = op0; - else - tga_ret = gen_reg_rtx (Pmode); + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); if (TARGET_TLS64) { - emit_insn (gen_load_dtprel (tga_ret, op1)); - emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret)); + emit_insn (gen_load_dtprel (op0, op1)); + emit_insn (gen_adddi3 (op0, tmp, op0)); } else - emit_insn (gen_add_dtprel (tga_ret, tmp, op1)); - if (tga_ret == op0) - return NULL_RTX; - op1 = tga_ret; + emit_insn (gen_add_dtprel (op0, tmp, op1)); break; case TLS_MODEL_INITIAL_EXEC: @@ -1229,35 +1228,32 @@ ia64_expand_move (op0, op1) RTX_UNCHANGING_P (tmp) = 1; tmp = force_reg (Pmode, tmp); - if (register_operand (op0, Pmode)) - op1 = op0; - else - op1 = gen_reg_rtx (Pmode); - emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ())); - if (op1 == op0) - return NULL_RTX; + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); + emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); break; case TLS_MODEL_LOCAL_EXEC: - if (register_operand (op0, Pmode)) - tmp = op0; - else - tmp = gen_reg_rtx (Pmode); + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); if (TARGET_TLS64) { - emit_insn (gen_load_tprel (tmp, op1)); - emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp)); + emit_insn (gen_load_tprel (op0, op1)); + emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0)); } else - emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1)); - if (tmp == op0) - return NULL_RTX; - op1 = tmp; + emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1)); break; default: abort (); } + + if (orig_op0 == op0) + return NULL_RTX; + if (GET_MODE (orig_op0) == Pmode) + return op0; + return gen_lowpart (GET_MODE (orig_op0), op0); } else if (!TARGET_NO_PIC && (symbolic_operand (op1, Pmode) || @@ -2015,10 +2011,6 @@ ia64_initial_elimination_offset (from, to) abort (); break; - case RETURN_ADDRESS_POINTER_REGNUM: - offset = 0; - break; - default: abort (); } @@ -2369,17 +2361,6 @@ ia64_expand_prologue () reg_names[current_frame_info.reg_fp] = tmp; } - /* Fix up the return address placeholder. */ - /* ??? We can fail if __builtin_return_address is used, and we didn't - allocate a register in which to save b0. I can't think of a way to - eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and - then be sure that I got the right one. Further, reload doesn't seem - to care if an eliminable register isn't used, and "eliminates" it - anyway. */ - if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM] - && current_frame_info.reg_save_b0 != 0) - XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0; - /* We don't need an alloc instruction if we've used no outputs or locals. */ if (current_frame_info.n_local_regs == 0 && current_frame_info.n_output_regs == 0 @@ -2936,6 +2917,72 @@ ia64_direct_return () return 0; } +/* Return the magic cookie that we use to hold the return address + during early compilation. */ + +rtx +ia64_return_addr_rtx (count, frame) + HOST_WIDE_INT count; + rtx frame ATTRIBUTE_UNUSED; +{ + if (count != 0) + return NULL; + return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); +} + +/* Split this value after reload, now that we know where the return + address is saved. */ + +void +ia64_split_return_addr_rtx (dest) + rtx dest; +{ + rtx src; + + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + if (current_frame_info.reg_save_b0 != 0) + src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); + else + { + HOST_WIDE_INT off; + unsigned int regno; + + /* Compute offset from CFA for BR0. */ + /* ??? Must be kept in sync with ia64_expand_prologue. */ + off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size); + for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + off -= 8; + + /* Convert CFA offset to a register based offset. */ + if (frame_pointer_needed) + src = hard_frame_pointer_rtx; + else + { + src = stack_pointer_rtx; + off += current_frame_info.total_size; + } + + /* Load address into scratch register. */ + if (CONST_OK_FOR_I (off)) + emit_insn (gen_adddi3 (dest, src, GEN_INT (off))); + else + { + emit_move_insn (dest, GEN_INT (off)); + emit_insn (gen_adddi3 (dest, src, dest)); + } + + src = gen_rtx_MEM (Pmode, dest); + } + } + else + src = gen_rtx_REG (DImode, BR_REG (0)); + + emit_move_insn (dest, src); +} + int ia64_hard_regno_rename_ok (from, to) int from; @@ -3085,9 +3132,6 @@ ia64_output_function_epilogue (file, size) { int i; - /* Reset from the function's potential modifications. */ - XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM; - if (current_frame_info.reg_fp) { const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; @@ -7060,7 +7104,8 @@ ia64_emit_nops () { while (bundle_pos < 3) { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); + if (b->t[bundle_pos] != TYPE_L) + emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); bundle_pos++; } continue; @@ -7666,11 +7711,16 @@ ia64_init_builtins () psi_type_node, integer_type_node, integer_type_node, NULL_TREE); - /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ + /* __sync_val_compare_and_swap_di */ tree di_ftype_pdi_di_di = build_function_type_list (long_integer_type_node, pdi_type_node, long_integer_type_node, long_integer_type_node, NULL_TREE); + /* __sync_bool_compare_and_swap_di */ + tree si_ftype_pdi_di_di + = build_function_type_list (integer_type_node, + pdi_type_node, long_integer_type_node, + long_integer_type_node, NULL_TREE); /* __sync_synchronize */ tree void_ftype_void = build_function_type (void_type_node, void_list_node); @@ -7703,7 +7753,7 @@ ia64_init_builtins () IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); - def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, + def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); def_builtin ("__sync_synchronize", void_ftype_void, @@ -7943,7 +7993,8 @@ ia64_expand_op_and_fetch (binoptab, mode, arglist, target) */ static rtx -ia64_expand_compare_and_swap (mode, boolp, arglist, target) +ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target) + enum machine_mode rmode; enum machine_mode mode; int boolp; tree arglist; @@ -7991,7 +8042,7 @@ ia64_expand_compare_and_swap (mode, boolp, arglist, target) if (boolp) { if (! target) - target = gen_reg_rtx (mode); + target = gen_reg_rtx (rmode); return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1); } else @@ -8066,11 +8117,16 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore) tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); tree arglist = TREE_OPERAND (exp, 1); + enum machine_mode rmode = VOIDmode; switch (fcode) { case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: + mode = SImode; + rmode = SImode; + break; + case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: case IA64_BUILTIN_LOCK_RELEASE_SI: case IA64_BUILTIN_FETCH_AND_ADD_SI: @@ -8089,7 +8145,15 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore) break; case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: + mode = DImode; + rmode = SImode; + break; + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: + mode = DImode; + rmode = DImode; + break; + case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: case IA64_BUILTIN_LOCK_RELEASE_DI: case IA64_BUILTIN_FETCH_AND_ADD_DI: @@ -8115,11 +8179,13 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore) { case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: - return ia64_expand_compare_and_swap (mode, 1, arglist, target); + return ia64_expand_compare_and_swap (rmode, mode, 1, arglist, + target); case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: - return ia64_expand_compare_and_swap (mode, 0, arglist, target); + return ia64_expand_compare_and_swap (rmode, mode, 0, arglist, + target); case IA64_BUILTIN_SYNCHRONIZE: emit_insn (gen_mf ()); @@ -8368,6 +8434,9 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) emit_note (NULL, NOTE_INSN_PROLOGUE_END); this = gen_rtx_REG (Pmode, IN_REG (0)); + if (TARGET_ILP32) + emit_insn (gen_ptr_extend (this, + gen_rtx_REG (ptr_mode, IN_REG (0)))); /* Apply the constant offset, if required. */ if (delta) @@ -8389,7 +8458,14 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) rtx vcall_offset_rtx = GEN_INT (vcall_offset); rtx tmp = gen_rtx_REG (Pmode, 2); - emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); + if (TARGET_ILP32) + { + rtx t = gen_rtx_REG (ptr_mode, 2); + emit_move_insn (t, gen_rtx_MEM (ptr_mode, this)); + emit_insn (gen_ptr_extend (tmp, t)); + } + else + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); if (!CONST_OK_FOR_J (vcall_offset)) { @@ -8399,7 +8475,11 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) } emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); - emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); + if (TARGET_ILP32) + emit_move_insn (gen_rtx_REG (ptr_mode, 2), + gen_rtx_MEM (ptr_mode, tmp)); + else + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); emit_insn (gen_adddi3 (this, this, tmp)); } diff --git a/contrib/gcc/config/ia64/ia64.h b/contrib/gcc/config/ia64/ia64.h index 724405a..60d0143 100644 --- a/contrib/gcc/config/ia64/ia64.h +++ b/contrib/gcc/config/ia64/ia64.h @@ -436,7 +436,7 @@ while (0) 64 predicate registers, 8 branch registers, one frame pointer, and several "application" registers. */ -#define FIRST_PSEUDO_REGISTER 335 +#define FIRST_PSEUDO_REGISTER 334 /* Ranges for the various kinds of registers. */ #define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3) @@ -445,9 +445,7 @@ while (0) #define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319) #define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327) #define GENERAL_REGNO_P(REGNO) \ - (GR_REGNO_P (REGNO) \ - || (REGNO) == FRAME_POINTER_REGNUM \ - || (REGNO) == RETURN_ADDRESS_POINTER_REGNUM) + (GR_REGNO_P (REGNO) || (REGNO) == FRAME_POINTER_REGNUM) #define GR_REG(REGNO) ((REGNO) + 0) #define FR_REG(REGNO) ((REGNO) + 128) @@ -457,11 +455,11 @@ while (0) #define IN_REG(REGNO) ((REGNO) + 112) #define LOC_REG(REGNO) ((REGNO) + 32) -#define AR_CCV_REGNUM 330 -#define AR_UNAT_REGNUM 331 -#define AR_PFS_REGNUM 332 -#define AR_LC_REGNUM 333 -#define AR_EC_REGNUM 334 +#define AR_CCV_REGNUM 329 +#define AR_UNAT_REGNUM 330 +#define AR_PFS_REGNUM 331 +#define AR_LC_REGNUM 332 +#define AR_EC_REGNUM 333 #define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7)) #define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79)) @@ -524,8 +522,8 @@ while (0) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ /* Branch registers. */ \ 0, 0, 0, 0, 0, 0, 0, 0, \ - /*FP RA CCV UNAT PFS LC EC */ \ - 1, 1, 1, 1, 1, 0, 1 \ + /*FP CCV UNAT PFS LC EC */ \ + 1, 1, 1, 1, 0, 1 \ } /* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered @@ -559,8 +557,8 @@ while (0) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ /* Branch registers. */ \ 1, 0, 0, 0, 0, 0, 1, 1, \ - /*FP RA CCV UNAT PFS LC EC */ \ - 1, 1, 1, 1, 1, 0, 1 \ + /*FP CCV UNAT PFS LC EC */ \ + 1, 1, 1, 1, 0, 1 \ } /* Like `CALL_USED_REGISTERS' but used to overcome a historical @@ -597,8 +595,8 @@ while (0) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ /* Branch registers. */ \ 1, 0, 0, 0, 0, 0, 1, 1, \ - /*FP RA CCV UNAT PFS LC EC */ \ - 0, 0, 1, 0, 1, 0, 0 \ + /*FP CCV UNAT PFS LC EC */ \ + 0, 1, 0, 1, 0, 0 \ } @@ -744,7 +742,7 @@ while (0) /* Special branch registers. */ \ R_BR (0), \ /* Other fixed registers. */ \ - FRAME_POINTER_REGNUM, RETURN_ADDRESS_POINTER_REGNUM, \ + FRAME_POINTER_REGNUM, \ AR_CCV_REGNUM, AR_UNAT_REGNUM, AR_PFS_REGNUM, AR_LC_REGNUM, \ AR_EC_REGNUM \ } @@ -873,11 +871,11 @@ enum reg_class /* AR_M_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ - 0x00000000, 0x00000000, 0x0C00 }, \ + 0x00000000, 0x00000000, 0x0600 }, \ /* AR_I_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ - 0x00000000, 0x00000000, 0x7000 }, \ + 0x00000000, 0x00000000, 0x3800 }, \ /* ADDL_REGS. */ \ { 0x0000000F, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ @@ -885,7 +883,7 @@ enum reg_class /* GR_REGS. */ \ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ - 0x00000000, 0x00000000, 0x0300 }, \ + 0x00000000, 0x00000000, 0x0100 }, \ /* FR_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ @@ -893,15 +891,15 @@ enum reg_class /* GR_AND_BR_REGS. */ \ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ - 0x00000000, 0x00000000, 0x03FF }, \ + 0x00000000, 0x00000000, 0x01FF }, \ /* GR_AND_FR_REGS. */ \ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ - 0x00000000, 0x00000000, 0x0300 }, \ + 0x00000000, 0x00000000, 0x0100 }, \ /* ALL_REGS. */ \ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ - 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFF }, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF }, \ } /* A C expression whose value is a register class containing hard register @@ -1119,7 +1117,7 @@ enum reg_class DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush). */ #define RETURN_ADDR_RTX(COUNT, FRAME) \ - ((COUNT) == 0 ? return_address_pointer_rtx : const0_rtx) + ia64_return_addr_rtx (COUNT, FRAME) /* A C expression whose value is RTL representing the location of the incoming return address at the beginning of any function, before the prologue. This @@ -1180,13 +1178,6 @@ enum reg_class REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = 64; \ } while (0) -/* The register number for the return address register. For IA-64, this - is not actually a pointer as the name suggests, but that's a name that - gen_rtx_REG already takes care to keep unique. We modify - return_address_pointer_rtx in ia64_expand_prologue to reference the - final output regnum. */ -#define RETURN_ADDRESS_POINTER_REGNUM 329 - /* Register numbers used for passing a function's static chain pointer. */ /* ??? The ABI sez the static chain should be passed as a normal parameter. */ #define STATIC_CHAIN_REGNUM 15 @@ -1210,7 +1201,6 @@ enum reg_class {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ - {RETURN_ADDRESS_POINTER_REGNUM, BR_REG (0)}, \ } /* A C expression that returns nonzero if the compiler is allowed to try to @@ -1934,8 +1924,8 @@ do { \ "p60", "p61", "p62", "p63", \ /* Branch registers. */ \ "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", \ - /* Frame pointer. Return address. */ \ - "sfp", "retaddr", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec", \ + /* Frame pointer. Application registers. */ \ + "sfp", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec", \ } /* If defined, a C initializer for an array of structures containing a name and diff --git a/contrib/gcc/config/ia64/ia64.md b/contrib/gcc/config/ia64/ia64.md index 4baa5d3..3b88d9f 100644 --- a/contrib/gcc/config/ia64/ia64.md +++ b/contrib/gcc/config/ia64/ia64.md @@ -73,6 +73,7 @@ (UNSPEC_BUNDLE_SELECTOR 23) (UNSPEC_ADDP4 24) (UNSPEC_PROLOGUE_USE 25) + (UNSPEC_RET_ADDR 26) ]) (define_constants @@ -416,6 +417,25 @@ DONE; }) +;; This is used as a placeholder for the return address during early +;; compilation. We won't know where we've placed this until during +;; reload, at which point it can wind up in b0, a general register, +;; or memory. The only safe destination under these conditions is a +;; general register. + +(define_insn_and_split "*movdi_ret_addr" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_RET_ADDR))] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + ia64_split_return_addr_rtx (operands[0]); + DONE; +} + [(set_attr "itanium_class" "ialu")]) + (define_insn "*movdi_internal" [(set (match_operand:DI 0 "destination_operand" "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b, r,*e, r,*d, r,*c") diff --git a/contrib/gcc/config/ia64/ia64intrin.h b/contrib/gcc/config/ia64/ia64intrin.h index c7bbd33..262dc20 100644 --- a/contrib/gcc/config/ia64/ia64intrin.h +++ b/contrib/gcc/config/ia64/ia64intrin.h @@ -19,13 +19,11 @@ extern long __sync_val_compare_and_swap_di (long *, long, long); __sync_val_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW))) extern int __sync_bool_compare_and_swap_si (int *, int, int); -extern long __sync_bool_compare_and_swap_di (long *, long, long); +extern int __sync_bool_compare_and_swap_di (long *, long, long); #define __sync_bool_compare_and_swap(PTR, OLD, NEW) \ ((sizeof (*(PTR)) == sizeof(int)) \ - ? (__typeof__(*(PTR))) \ - __sync_bool_compare_and_swap_si((int *)(PTR),(int)(OLD),(int)(NEW)) \ - : (__typeof__(*(PTR))) \ - __sync_bool_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW))) + ? __sync_bool_compare_and_swap_si((int *)(PTR),(int)(OLD),(int)(NEW)) \ + : __sync_bool_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW))) extern void __sync_lock_release_si (int *); extern void __sync_lock_release_di (long *); diff --git a/contrib/gcc/config/ia64/libgcc-ia64.ver b/contrib/gcc/config/ia64/libgcc-ia64.ver index 2ffb693..cd76990 100644 --- a/contrib/gcc/config/ia64/libgcc-ia64.ver +++ b/contrib/gcc/config/ia64/libgcc-ia64.ver @@ -7,3 +7,6 @@ GCC_3.0 { __ia64_trampoline __ia64_backtrace } +GCC_3.3.2 { + _Unwind_GetBSP +} diff --git a/contrib/gcc/config/ia64/unwind-ia64.c b/contrib/gcc/config/ia64/unwind-ia64.c index 12e46ae..88d236b 100644 --- a/contrib/gcc/config/ia64/unwind-ia64.c +++ b/contrib/gcc/config/ia64/unwind-ia64.c @@ -1665,6 +1665,14 @@ _Unwind_GetCFA (struct _Unwind_Context *context) return (_Unwind_Ptr) context->psp; } +/* Get the value of the Backing Store Pointer as saved in CONTEXT. */ + +_Unwind_Word +_Unwind_GetBSP (struct _Unwind_Context *context) +{ + return (_Unwind_Ptr) context->bsp; +} + static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs) @@ -1786,6 +1794,11 @@ uw_update_reg_address (struct _Unwind_Context *context, addr = ia64_rse_skip_regs ((unsigned long *) context->bsp, rval - 32); else if (rval >= 2) addr = context->ireg[rval - 2].loc; + else if (rval == 0) + { + static const unsigned long dummy; + addr = (void *) &dummy; + } else abort (); break; @@ -1837,6 +1850,11 @@ uw_update_reg_address (struct _Unwind_Context *context, context->ireg[regno - UNW_REG_R2].nat = context->ireg[rval - 2].nat; } + else if (rval == 0) + { + context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_NONE; + context->ireg[regno - UNW_REG_R2].nat.off = 0; + } else abort (); break; diff --git a/contrib/gcc/config/linux.h b/contrib/gcc/config/linux.h index 2ddb9c7..297fa92 100644 --- a/contrib/gcc/config/linux.h +++ b/contrib/gcc/config/linux.h @@ -108,6 +108,9 @@ Boston, MA 02111-1307, USA. */ #define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " #endif +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + /* Define this so we can compile MS code for use with WINE. */ #define HANDLE_PRAGMA_PACK_PUSH_POP diff --git a/contrib/gcc/config/rs6000/linux.h b/contrib/gcc/config/rs6000/linux.h index 593e961..15db88a 100644 --- a/contrib/gcc/config/rs6000/linux.h +++ b/contrib/gcc/config/rs6000/linux.h @@ -48,6 +48,9 @@ Boston, MA 02111-1307, USA. */ #undef LINK_SHLIB_SPEC #define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}" +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + #undef LIB_DEFAULT_SPEC #define LIB_DEFAULT_SPEC "%(lib_linux)" diff --git a/contrib/gcc/config/rs6000/linux64.h b/contrib/gcc/config/rs6000/linux64.h index 55065c6..a538d02 100644 --- a/contrib/gcc/config/rs6000/linux64.h +++ b/contrib/gcc/config/rs6000/linux64.h @@ -137,6 +137,9 @@ Boston, MA 02111-1307, USA. */ #undef LINK_SHLIB_SPEC #define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}" +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + #undef LIB_DEFAULT_SPEC #define LIB_DEFAULT_SPEC "%(lib_linux)" diff --git a/contrib/gcc/config/rs6000/rs6000.c b/contrib/gcc/config/rs6000/rs6000.c index 3c17f21..13f4ed3 100644 --- a/contrib/gcc/config/rs6000/rs6000.c +++ b/contrib/gcc/config/rs6000/rs6000.c @@ -3310,7 +3310,7 @@ function_arg_pass_by_reference (cum, mode, type, named) return 1; } - return type && int_size_in_bytes (type) <= 0; + return type && int_size_in_bytes (type) < 0; } /* Perform any needed actions needed for a function that is receiving a @@ -3551,7 +3551,7 @@ rs6000_va_arg (valist, type) if (DEFAULT_ABI != ABI_V4) { /* Variable sized types are passed by reference. */ - if (int_size_in_bytes (type) <= 0) + if (int_size_in_bytes (type) < 0) { u = build_pointer_type (type); diff --git a/contrib/gcc/config/rs6000/rs6000.md b/contrib/gcc/config/rs6000/rs6000.md index 4c95031..4d5ef9d 100644 --- a/contrib/gcc/config/rs6000/rs6000.md +++ b/contrib/gcc/config/rs6000/rs6000.md @@ -14308,7 +14308,7 @@ return \"bdz $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrsi_internal2" [(set (pc) @@ -14332,7 +14332,7 @@ return \"{bdn|bdnz} $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrdi_internal1" [(set (pc) @@ -14356,7 +14356,7 @@ return \"bdz $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrdi_internal2" [(set (pc) @@ -14380,7 +14380,7 @@ return \"{bdn|bdnz} $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) ;; Similar, but we can use GE since we have a REG_NONNEG. @@ -14406,7 +14406,7 @@ return \"bdz $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrsi_internal4" [(set (pc) @@ -14430,7 +14430,7 @@ return \"{bdn|bdnz} $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrdi_internal3" [(set (pc) @@ -14454,7 +14454,7 @@ return \"bdz $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrdi_internal4" [(set (pc) @@ -14478,7 +14478,7 @@ return \"{bdn|bdnz} $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) ;; Similar but use EQ @@ -14504,7 +14504,7 @@ return \"{bdn|bdnz} $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrsi_internal6" [(set (pc) @@ -14528,7 +14528,7 @@ return \"bdz $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrdi_internal5" [(set (pc) @@ -14552,7 +14552,7 @@ return \"{bdn|bdnz} $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) (define_insn "*ctrdi_internal6" [(set (pc) @@ -14576,7 +14576,7 @@ return \"bdz $+8\;b %l0\"; }" [(set_attr "type" "branch") - (set_attr "length" "4,12,16")]) + (set_attr "length" "*,12,16")]) ;; Now the splitters if we could not allocate the CTR register diff --git a/contrib/gcc/config/rs6000/sysv4.h b/contrib/gcc/config/rs6000/sysv4.h index 3da996f..26450e7 100644 --- a/contrib/gcc/config/rs6000/sysv4.h +++ b/contrib/gcc/config/rs6000/sysv4.h @@ -383,6 +383,21 @@ do { \ #undef STACK_BOUNDARY #define STACK_BOUNDARY (TARGET_ALTIVEC_ABI ? 128 : 64) +/* Define this macro if you wish to preserve a certain alignment for + the stack pointer, greater than what the hardware enforces. The + definition is a C expression for the desired alignment (measured + in bits). This macro must evaluate to a value equal to or larger + than STACK_BOUNDARY. + For the SYSV ABI and variants the alignment of the stack pointer + is usually controlled manually in rs6000.c. However, to maintain + alignment across alloca () in all circumstances, + PREFERRED_STACK_BOUNDARY needs to be set as well. + This has the additional advantage of allowing a bigger maximum + alignment of user objects on the stack. */ + +#undef PREFERRED_STACK_BOUNDARY +#define PREFERRED_STACK_BOUNDARY 128 + /* Real stack boundary as mandated by the appropriate ABI. */ #define ABI_STACK_BOUNDARY ((TARGET_EABI && !TARGET_ALTIVEC_ABI) ? 64 : 128) diff --git a/contrib/gcc/config/sparc/linux.h b/contrib/gcc/config/sparc/linux.h index ea16b7eed..e4bd7bb 100644 --- a/contrib/gcc/config/sparc/linux.h +++ b/contrib/gcc/config/sparc/linux.h @@ -256,6 +256,10 @@ do { \ #undef CTORS_SECTION_ASM_OP #undef DTORS_SECTION_ASM_OP +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + /* Do code reading to identify a signal frame, and set the frame state data appropriately. See unwind-dw2.c for the structs. */ diff --git a/contrib/gcc/config/sparc/linux64.h b/contrib/gcc/config/sparc/linux64.h index 8fd3a1f..27be1d1 100644 --- a/contrib/gcc/config/sparc/linux64.h +++ b/contrib/gcc/config/sparc/linux64.h @@ -319,6 +319,10 @@ do { \ #undef CTORS_SECTION_ASM_OP #undef DTORS_SECTION_ASM_OP +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + /* Do code reading to identify a signal frame, and set the frame state data appropriately. See unwind-dw2.c for the structs. */ diff --git a/contrib/gcc/config/sparc/sol2-c1.asm b/contrib/gcc/config/sparc/sol2-c1.asm index 894a8c3..a1cc68d 100644 --- a/contrib/gcc/config/sparc/sol2-c1.asm +++ b/contrib/gcc/config/sparc/sol2-c1.asm @@ -92,6 +92,10 @@ _start: ! access those data anyway. Instead, go straight to main: mov %l0, %o0 ! argc mov %l1, %o1 ! argv +#ifdef GCRT1 + setn(___Argv, %o4, %o3) + stn %o1, [%o3] ! *___Argv +#endif ! Skip argc words past argv, to env: sll %l0, CPTRSHIFT, %o2 add %o2, CPTRSIZE, %o2 diff --git a/contrib/gcc/config/sparc/sparc.c b/contrib/gcc/config/sparc/sparc.c index 5b03f05..79022b4 100644 --- a/contrib/gcc/config/sparc/sparc.c +++ b/contrib/gcc/config/sparc/sparc.c @@ -8028,6 +8028,10 @@ sparc_v8plus_shift (operands, insn, opcode) if (which_alternative != 2) operands[3] = operands[0]; + /* We can only shift by constants <= 63. */ + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + if (GET_CODE (operands[1]) == CONST_INT) { output_asm_insn ("mov\t%1, %3", operands); diff --git a/contrib/gcc/config/sparc/sparc.md b/contrib/gcc/config/sparc/sparc.md index ebe9d2b..b53013e 100644 --- a/contrib/gcc/config/sparc/sparc.md +++ b/contrib/gcc/config/sparc/sparc.md @@ -148,8 +148,12 @@ (eq_attr "branch_type" "fcc") (if_then_else (match_operand 0 "fcc0_reg_operand" "") (if_then_else (eq_attr "empty_delay_slot" "true") - (const_int 2) - (const_int 1)) + (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0)) + (const_int 3) + (const_int 2)) + (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0)) + (const_int 2) + (const_int 1))) (if_then_else (lt (pc) (match_dup 2)) (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 260000)) (if_then_else (eq_attr "empty_delay_slot" "true") @@ -6881,6 +6885,8 @@ { if (operands[2] == const1_rtx) return "add\t%1, %1, %0"; + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); return "sll\t%1, %2, %0"; } [(set (attr "type") @@ -6910,6 +6916,8 @@ { if (operands[2] == const1_rtx) return "add\t%1, %1, %0"; + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); return "sllx\t%1, %2, %0"; } [(set (attr "type") @@ -6969,7 +6977,11 @@ (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] "" - "sra\t%1, %2, %0" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); + return "sra\t%1, %2, %0"; + } [(set_attr "type" "shift")]) (define_insn "*ashrsi3_extend" @@ -7016,12 +7028,17 @@ } }) -(define_insn "" +(define_insn "*ashrdi3_sp64" [(set (match_operand:DI 0 "register_operand" "=r") (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] "TARGET_ARCH64" - "srax\t%1, %2, %0" + + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + return "srax\t%1, %2, %0"; + } [(set_attr "type" "shift")]) ;; XXX @@ -7040,7 +7057,11 @@ (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] "" - "srl\t%1, %2, %0" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); + return "srl\t%1, %2, %0"; + } [(set_attr "type" "shift")]) ;; This handles the case where @@ -7097,12 +7118,16 @@ } }) -(define_insn "" +(define_insn "*lshrdi3_sp64" [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] "TARGET_ARCH64" - "srlx\t%1, %2, %0" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + return "srlx\t%1, %2, %0"; + } [(set_attr "type" "shift")]) ;; XXX diff --git a/contrib/gcc/config/t-darwin b/contrib/gcc/config/t-darwin index 7fe5e93..c823fa5 100644 --- a/contrib/gcc/config/t-darwin +++ b/contrib/gcc/config/t-darwin @@ -12,7 +12,7 @@ gt-darwin.h : s-gtype ; @true # Explain how to build crt2.o $(T)crt2$(objext): $(srcdir)/config/darwin-crt2.c $(GCC_PASSES) \ - $(TCONFIG_H) tsystem.h + $(TCONFIG_H) stmp-int-hdrs tsystem.h $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \ -c $(srcdir)/config/darwin-crt2.c -o $(T)crt2$(objext) diff --git a/contrib/gcc/config/t-libunwind b/contrib/gcc/config/t-libunwind new file mode 100644 index 0000000..be50bc4 --- /dev/null +++ b/contrib/gcc/config/t-libunwind @@ -0,0 +1 @@ +LIB2ADDEH = $(srcdir)/unwind-libunwind.c $(srcdir)/unwind-sjlj.c |