diff options
Diffstat (limited to 'contrib/gcc/config/i386')
-rw-r--r-- | contrib/gcc/config/i386/i386.c | 43 | ||||
-rw-r--r-- | contrib/gcc/config/i386/i386.h | 24 | ||||
-rw-r--r-- | contrib/gcc/config/i386/i386.md | 4 | ||||
-rw-r--r-- | contrib/gcc/config/i386/linux64.h | 6 | ||||
-rw-r--r-- | contrib/gcc/config/i386/mmintrin.h | 46 | ||||
-rw-r--r-- | contrib/gcc/config/i386/xmmintrin.h | 30 |
6 files changed, 103 insertions, 50 deletions
diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c index 5e48715..c476406 100644 --- a/contrib/gcc/config/i386/i386.c +++ b/contrib/gcc/config/i386/i386.c @@ -906,6 +906,27 @@ override_options () int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); + /* Set the default values for switches whose default depends on TARGET_64BIT + in case they weren't overwriten by command line options. */ + if (TARGET_64BIT) + { + if (flag_omit_frame_pointer == 2) + flag_omit_frame_pointer = 1; + if (flag_asynchronous_unwind_tables == 2) + flag_asynchronous_unwind_tables = 1; + if (flag_pcc_struct_return == 2) + flag_pcc_struct_return = 0; + } + else + { + if (flag_omit_frame_pointer == 2) + flag_omit_frame_pointer = 0; + if (flag_asynchronous_unwind_tables == 2) + flag_asynchronous_unwind_tables = 0; + if (flag_pcc_struct_return == 2) + flag_pcc_struct_return = 1; + } + #ifdef SUBTARGET_OVERRIDE_OPTIONS SUBTARGET_OVERRIDE_OPTIONS; #endif @@ -1213,13 +1234,14 @@ optimization_options (level, size) if (level > 1) flag_schedule_insns = 0; #endif - if (TARGET_64BIT && optimize >= 1) - flag_omit_frame_pointer = 1; - if (TARGET_64BIT) - { - flag_pcc_struct_return = 0; - flag_asynchronous_unwind_tables = 1; - } + /* The default values of these switches depend on the TARGET_64BIT + that is not known at this moment. Mark these values with 2 and + let user the to override these. In case there is no command line option + specifying them, we will set the defaults in override_options. */ + if (optimize >= 1) + flag_omit_frame_pointer = 2; + flag_pcc_struct_return = 2; + flag_asynchronous_unwind_tables = 2; } /* Table of valid machine attributes. */ @@ -6862,8 +6884,7 @@ ix86_expand_vector_move (mode, operands) /* Make operand1 a register if it isn't already. */ if ((reload_in_progress | reload_completed) == 0 && !register_operand (operands[0], mode) - && !register_operand (operands[1], mode) - && operands[1] != CONST0_RTX (mode)) + && !register_operand (operands[1], mode)) { rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); emit_move_insn (operands[0], temp); @@ -10961,14 +10982,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 }, { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 }, { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, diff --git a/contrib/gcc/config/i386/i386.h b/contrib/gcc/config/i386/i386.h index 58d2c433..db6970e 100644 --- a/contrib/gcc/config/i386/i386.h +++ b/contrib/gcc/config/i386/i386.h @@ -1588,6 +1588,10 @@ enum reg_class #define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS) +/* We want the stack and args grow in opposite directions, even if + PUSH_ARGS is 0. */ +#define PUSH_ARGS_REVERSED 1 + /* Offset of first parameter from the argument pointer register value. */ #define FIRST_PARM_OFFSET(FNDECL) 0 @@ -2060,13 +2064,9 @@ enum ix86_builtins IX86_BUILTIN_CMPEQSS, IX86_BUILTIN_CMPLTSS, IX86_BUILTIN_CMPLESS, - IX86_BUILTIN_CMPGTSS, - IX86_BUILTIN_CMPGESS, IX86_BUILTIN_CMPNEQSS, IX86_BUILTIN_CMPNLTSS, IX86_BUILTIN_CMPNLESS, - IX86_BUILTIN_CMPNGTSS, - IX86_BUILTIN_CMPNGESS, IX86_BUILTIN_CMPORDSS, IX86_BUILTIN_CMPUNORDSS, IX86_BUILTIN_CMPNESS, @@ -2878,13 +2878,25 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER]; It need not be very fast code. */ #define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \ - asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]) +do { \ + if (TARGET_64BIT) \ + asm_fprintf ((FILE), "\tpush{q}\t%%r%s\n", \ + reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \ + else \ + asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]); \ +} while (0) /* This is how to output an insn to pop a register from the stack. It need not be very fast code. */ #define ASM_OUTPUT_REG_POP(FILE, REGNO) \ - asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]) +do { \ + if (TARGET_64BIT) \ + asm_fprintf ((FILE), "\tpop{q}\t%%r%s\n", \ + reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \ + else \ + asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]); \ +} while (0) /* This is how to output an element of a case-vector that is absolute. */ diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index 4275675..36a0497 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -5311,7 +5311,7 @@ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") (match_operand:DI 2 "general_operand" "roiF,riF"))) (clobber (reg:CC 17))] - "!TARGET_64BIT" + "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" "#") (define_split @@ -6940,7 +6940,7 @@ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:DI 2 "general_operand" "roiF,riF"))) (clobber (reg:CC 17))] - "!TARGET_64BIT" + "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" "#") (define_split diff --git a/contrib/gcc/config/i386/linux64.h b/contrib/gcc/config/i386/linux64.h index 8a0bfbe..34c6d3c 100644 --- a/contrib/gcc/config/i386/linux64.h +++ b/contrib/gcc/config/i386/linux64.h @@ -30,6 +30,12 @@ Boston, MA 02111-1307, USA. */ #undef CPP_SPEC #define CPP_SPEC "%(cpp_cpu) %{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT} %{!m32:-D__LONG_MAX__=9223372036854775807L}" +/* The svr4 ABI for the i386 says that records and unions are returned + in memory. In the 64bit compilation we will turn this flag off in + override_options, as we never do pcc_struct_return scheme on this target. */ +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 1 + /* Provide a LINK_SPEC. Here we provide support for the special GCC options -static and -shared, which allow us to link things in one of these three modes by applying the appropriate combinations of diff --git a/contrib/gcc/config/i386/mmintrin.h b/contrib/gcc/config/i386/mmintrin.h index 88e384f..bbfdd30 100644 --- a/contrib/gcc/config/i386/mmintrin.h +++ b/contrib/gcc/config/i386/mmintrin.h @@ -31,7 +31,7 @@ #define _MMINTRIN_H_INCLUDED /* The data type intended for user use. */ -typedef unsigned long long __m64 __attribute__ ((__aligned__ (8))); +typedef int __m64 __attribute__ ((__mode__ (__V2SI__))); /* Internal data types for implementing the intrinsics. */ typedef int __v2si __attribute__ ((__mode__ (__V2SI__))); @@ -49,14 +49,16 @@ _mm_empty (void) static __inline __m64 _mm_cvtsi32_si64 (int __i) { - return (unsigned int) __i; + long long __tmp = (unsigned int)__i; + return (__m64) __tmp; } /* Convert the lower 32 bits of the __m64 object into an integer. */ static __inline int _mm_cvtsi64_si32 (__m64 __i) { - return __i; + long long __tmp = (long long)__i; + return __tmp; } /* Pack the four 16-bit values from M1 into the lower four 8-bit values of @@ -269,7 +271,7 @@ _mm_mullo_pi16 (__m64 __m1, __m64 __m2) static __inline __m64 _mm_sll_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); } static __inline __m64 @@ -282,7 +284,7 @@ _mm_slli_pi16 (__m64 __m, int __count) static __inline __m64 _mm_sll_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); + return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); } static __inline __m64 @@ -293,22 +295,22 @@ _mm_slli_pi32 (__m64 __m, int __count) /* Shift the 64-bit value in M left by COUNT. */ static __inline __m64 -_mm_sll_pi64 (__m64 __m, __m64 __count) +_mm_sll_si64 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psllq (__m, __count); + return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); } static __inline __m64 -_mm_slli_pi64 (__m64 __m, int __count) +_mm_slli_si64 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psllq (__m, __count); + return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); } /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ static __inline __m64 _mm_sra_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); } static __inline __m64 @@ -321,7 +323,7 @@ _mm_srai_pi16 (__m64 __m, int __count) static __inline __m64 _mm_sra_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); + return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); } static __inline __m64 @@ -334,7 +336,7 @@ _mm_srai_pi32 (__m64 __m, int __count) static __inline __m64 _mm_srl_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); } static __inline __m64 @@ -347,7 +349,7 @@ _mm_srli_pi16 (__m64 __m, int __count) static __inline __m64 _mm_srl_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); + return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); } static __inline __m64 @@ -358,22 +360,22 @@ _mm_srli_pi32 (__m64 __m, int __count) /* Shift the 64-bit value in M left by COUNT; shift in zeros. */ static __inline __m64 -_mm_srl_pi64 (__m64 __m, __m64 __count) +_mm_srl_si64 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrlq (__m, __count); + return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); } static __inline __m64 -_mm_srli_pi64 (__m64 __m, int __count) +_mm_srli_si64 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrlq (__m, __count); + return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); } /* Bit-wise AND the 64-bit values in M1 and M2. */ static __inline __m64 _mm_and_si64 (__m64 __m1, __m64 __m2) { - return __builtin_ia32_pand (__m1, __m2); + return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); } /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the @@ -381,21 +383,21 @@ _mm_and_si64 (__m64 __m1, __m64 __m2) static __inline __m64 _mm_andnot_si64 (__m64 __m1, __m64 __m2) { - return __builtin_ia32_pandn (__m1, __m2); + return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); } /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ static __inline __m64 _mm_or_si64 (__m64 __m1, __m64 __m2) { - return __builtin_ia32_por (__m1, __m2); + return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); } /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ static __inline __m64 _mm_xor_si64 (__m64 __m1, __m64 __m2) { - return __builtin_ia32_pxor (__m1, __m2); + return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); } /* Compare eight 8-bit values. The result of the comparison is 0xFF if the @@ -444,7 +446,7 @@ _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) static __inline __m64 _mm_setzero_si64 (void) { - return __builtin_ia32_mmx_zero (); + return (__m64)__builtin_ia32_mmx_zero (); } /* Creates a vector of two 32-bit values; I0 is least significant. */ diff --git a/contrib/gcc/config/i386/xmmintrin.h b/contrib/gcc/config/i386/xmmintrin.h index 9f9f2f9..409bf17 100644 --- a/contrib/gcc/config/i386/xmmintrin.h +++ b/contrib/gcc/config/i386/xmmintrin.h @@ -245,13 +245,21 @@ _mm_cmple_ss (__m128 __A, __m128 __B) static __inline __m128 _mm_cmpgt_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_cmpgtss ((__v4sf)__A, (__v4sf)__B); + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpltss ((__v4sf) __B, + (__v4sf) + __A)); } static __inline __m128 _mm_cmpge_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_cmpgess ((__v4sf)__A, (__v4sf)__B); + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpless ((__v4sf) __B, + (__v4sf) + __A)); } static __inline __m128 @@ -275,13 +283,21 @@ _mm_cmpnle_ss (__m128 __A, __m128 __B) static __inline __m128 _mm_cmpngt_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_cmpngtss ((__v4sf)__A, (__v4sf)__B); + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpnltss ((__v4sf) __B, + (__v4sf) + __A)); } static __inline __m128 _mm_cmpnge_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_cmpngess ((__v4sf)__A, (__v4sf)__B); + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpnless ((__v4sf) __B, + (__v4sf) + __A)); } static __inline __m128 @@ -1017,7 +1033,7 @@ _mm_prefetch (void *__P, enum _mm_hint __I) static __inline void _mm_stream_pi (__m64 *__P, __m64 __A) { - __builtin_ia32_movntq (__P, __A); + __builtin_ia32_movntq (__P, (long long)__A); } /* Likewise. The address must be 16-byte aligned. */ @@ -1049,8 +1065,8 @@ _mm_pause (void) do { \ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ __v4sf __t0 = __builtin_ia32_shufps (__r0, __r1, 0x44); \ - __v4sf __t1 = __builtin_ia32_shufps (__r0, __r1, 0xEE); \ - __v4sf __t2 = __builtin_ia32_shufps (__r2, __r3, 0x44); \ + __v4sf __t2 = __builtin_ia32_shufps (__r0, __r1, 0xEE); \ + __v4sf __t1 = __builtin_ia32_shufps (__r2, __r3, 0x44); \ __v4sf __t3 = __builtin_ia32_shufps (__r2, __r3, 0xEE); \ (row0) = __builtin_ia32_shufps (__t0, __t1, 0x88); \ (row1) = __builtin_ia32_shufps (__t0, __t1, 0xDD); \ |