diff options
Diffstat (limited to 'contrib/gcc/config/i386')
66 files changed, 11406 insertions, 4569 deletions
diff --git a/contrib/gcc/config/i386/athlon.md b/contrib/gcc/config/i386/athlon.md new file mode 100644 index 0000000..548f2ad --- /dev/null +++ b/contrib/gcc/config/i386/athlon.md @@ -0,0 +1,206 @@ +;; AMD Athlon Scheduling +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +(define_attr "athlon_decode" "direct,vector" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF"))) + (const_string "vector")] + (const_string "direct"))) + +;; The Athlon does contain three pipelined FP units, three integer units and +;; three address generation units. +;; +;; The predecode logic is determining boundaries of instructions in the 64 +;; byte cache line. So the cache line straddling problem of K6 might be issue +;; here as well, but it is not noted in the documentation. +;; +;; Three DirectPath instructions decoders and only one VectorPath decoder +;; is available. They can decode three DirectPath instructions or one VectorPath +;; instruction per cycle. +;; Decoded macro instructions are then passed to 72 entry instruction control +;; unit, that passes +;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. +;; +;; The load/store queue unit is not attached to the schedulers but +;; communicates with all the execution units separately instead. + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "vector")) + 1 1) + +(define_function_unit "athlon_directdec" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1) + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1 [(eq_attr "athlon_decode" "vector")]) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) + 1 1) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "str")) + 15 15) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 42 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 42 42) + +(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" + (cond [(eq_attr "type" "fop,fcmp,fistp") + (const_string "add") + (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") + (const_string "mul") + (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) + (const_string "store") + (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) + (const_string "any") + (and (eq_attr "type" "fmov") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "store") + (eq_attr "type" "fmov") + (const_string "muladd")] + (const_string "none"))) + +;; We use latencies 1 for definitions. This is OK to model colisions +;; in execution units. The real latencies are modeled in the "fp" pipeline. + +;; fsin, fcos: 96-192 +;; fsincos: 107-211 +;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fpspc")) + 100 1) + +;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fdiv")) + 24 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fop,fmul,fistp")) + 4 1) + +;; XFmode loads are slow. +;; XFmode store is slow too (8 cycles), but we don't need to model it, because +;; there are no dependent instructions. + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + 10 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fmov,fsgn")) + 2 1) + +;; fcmp and ftst instructions +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "athlon_decode" "direct"))) + 3 1) + +;; fcmpi instructions. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "athlon_decode" "vector"))) + 3 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmov")) + 7 1) + +(define_function_unit "athlon_fp_mul" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "mul")) + 1 1) + +(define_function_unit "athlon_fp_add" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "add")) + 1 1) + +(define_function_unit "athlon_fp_muladd" 2 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "muladd,mul,add")) + 1 1) + +(define_function_unit "athlon_fp_store" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "store")) + 1 1) + +;; We don't need to model the Address Generation Unit, since we don't model +;; the re-order buffer yet and thus we never schedule more than three operations +;; at time. Later we may want to experiment with MD_SCHED macros modeling the +;; decoders independently on the functional units. + +;(define_function_unit "athlon_agu" 3 0 +; (and (eq_attr "cpu" "athlon") +; (and (eq_attr "memory" "!none") +; (eq_attr "athlon_fpunits" "none"))) +; 1 1) + +;; Model load unit to avoid too long sequences of loads. We don't need to +;; model store queue, since it is hardly going to be bottleneck. + +(define_function_unit "athlon_load" 2 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "memory" "load,both")) + 1 1) + diff --git a/contrib/gcc/config/i386/att.h b/contrib/gcc/config/i386/att.h index f16a5ea..70ae164 100644 --- a/contrib/gcc/config/i386/att.h +++ b/contrib/gcc/config/i386/att.h @@ -1,5 +1,6 @@ /* Definitions for AT&T assembler syntax for the Intel 80386. - Copyright (C) 1988, 1996, 2000 Free Software Foundation, Inc. + Copyright (C) 1988, 1996, 2000, 2001, 2002 + Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,10 +19,6 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* Include common aspects of all 386 Unix assemblers. */ -#include "i386/unix.h" - -#define TARGET_VERSION fprintf (stderr, " (80386, ATT syntax)"); /* Define the syntax of instructions and addresses. */ diff --git a/contrib/gcc/config/i386/beos-elf.h b/contrib/gcc/config/i386/beos-elf.h index be51b4a..b84519f 100644 --- a/contrib/gcc/config/i386/beos-elf.h +++ b/contrib/gcc/config/i386/beos-elf.h @@ -1,5 +1,5 @@ /* Definitions for Intel x86 running BeOS - Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -19,7 +19,6 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 BeOS/ELF)"); /* Change debugging to Dwarf2. */ @@ -41,21 +40,8 @@ Boston, MA 02111-1307, USA. */ /* Output assembler code to FILE to increment profiler label # LABELNO for profiling a function entry. */ -#undef FUNCTION_PROFILER -#define FUNCTION_PROFILER(FILE, LABELNO) \ -{ \ - if (flag_pic) \ - { \ - fprintf (FILE, "\tleal %sP%d@GOTOFF(%%ebx),%%edx\n", \ - LPREFIX, (LABELNO)); \ - fprintf (FILE, "\tcall *mcount@GOT(%%ebx)\n"); \ - } \ - else \ - { \ - fprintf (FILE, "\tmovl $%sP%d,%%edx\n", LPREFIX, (LABELNO)); \ - fprintf (FILE, "\tcall mcount\n"); \ - } \ -} +#undef MCOUNT_NAME +#define MCOUNT_NAME "mcount" #undef SIZE_TYPE #define SIZE_TYPE "long unsigned int" @@ -66,22 +52,28 @@ Boston, MA 02111-1307, USA. */ #undef WCHAR_TYPE #define WCHAR_TYPE "short unsigned int" -#undef WCHAR_UNSIGNED -#define WCHAR_UNSIGNED 1 - #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE 16 - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__ELF__ -D__BEOS__ -D__INTEL__ -D_X86_=1 \ --D__stdcall=__attribute__((__stdcall__)) \ --D__cdecl=__attribute__((__cdecl__)) \ --D__declspec(x)=__attribute__((x)) \ --Asystem=beos" - -#undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{!no-fPIC:%{!no-fpic:-D__PIC__ -D__pic__}}" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__ELF__"); \ + builtin_define ("__BEOS__"); \ + builtin_define ("__INTEL__"); \ + builtin_define ("_X86_"); \ + builtin_define ("__stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("__cdecl=__attribute__((__cdecl__))"); \ + builtin_define ("__declspec(x)=__attribute__((x))"); \ + builtin_assert ("system=beos"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ + while (0) + /* BeOS uses lots of multichars, so don't warn about them unless the user explicitly asks for the warnings with -Wmultichar. Note that CC1_SPEC is used for both cc1 and cc1plus. */ diff --git a/contrib/gcc/config/i386/biarch64.h b/contrib/gcc/config/i386/biarch64.h index e2a5d91..2d34698 100644 --- a/contrib/gcc/config/i386/biarch64.h +++ b/contrib/gcc/config/i386/biarch64.h @@ -21,5 +21,5 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#define TARGET_64BIT_DEFAULT -#define TARGET_BI_ARCH +#define TARGET_64BIT_DEFAULT MASK_64BIT +#define TARGET_BI_ARCH 1 diff --git a/contrib/gcc/config/i386/bsd.h b/contrib/gcc/config/i386/bsd.h index c58db1b..69ad168 100644 --- a/contrib/gcc/config/i386/bsd.h +++ b/contrib/gcc/config/i386/bsd.h @@ -1,7 +1,7 @@ /* Definitions for BSD assembler syntax for Intel 386 (actually AT&T syntax for insns and operands, adapted to BSD conventions for symbol names and debugging.) - Copyright (C) 1988, 1996, 2000 Free Software Foundation, Inc. + Copyright (C) 1988, 1996, 2000, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -20,23 +20,15 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* Include common aspects of all 386 Unix assemblers. */ -#include "i386/unix.h" - /* Use the Sequent Symmetry assembler syntax. */ - -#define TARGET_VERSION fprintf (stderr, " (80386, BSD syntax)"); /* Define the syntax of pseudo-ops, labels and comments. */ /* Prefix for internally generated assembler labels. If we aren't using underscores, we are using prefix `.'s to identify labels that should be ignored, as in `i386/gas.h' --karl@cs.umb.edu */ -#ifdef NO_UNDERSCORES -#define LPREFIX ".L" -#else + #define LPREFIX "L" -#endif /* not NO_UNDERSCORES */ /* Assembler pseudos to introduce constants of various size. */ @@ -93,32 +85,18 @@ Boston, MA 02111-1307, USA. */ PREFIX is the class of label and NUM is the number within the class. This is suitable for output with `assemble_name'. */ -#ifdef NO_UNDERSCORES -#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \ - sprintf ((BUF), "*.%s%ld", (PREFIX), (long)(NUMBER)) -#else #define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \ sprintf ((BUF), "*%s%ld", (PREFIX), (long)(NUMBER)) -#endif /* This is how to output an internal numbered label where PREFIX is the class of label and NUM is the number within the class. */ -#ifdef NO_UNDERSCORES -#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ - fprintf (FILE, ".%s%d:\n", PREFIX, NUM) -#else #define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ fprintf (FILE, "%s%d:\n", PREFIX, NUM) -#endif /* The prefix to add to user-visible assembler symbols. */ -#ifdef NO_UNDERSCORES -#define USER_LABEL_PREFIX "" -#else #define USER_LABEL_PREFIX "_" -#endif /* not NO_UNDERSCORES */ /* Sequent has some changes in the format of DBX symbols. */ #define DBX_NO_XREFS 1 diff --git a/contrib/gcc/config/i386/crtdll.h b/contrib/gcc/config/i386/crtdll.h index f3eae84..dab60c1 100644 --- a/contrib/gcc/config/i386/crtdll.h +++ b/contrib/gcc/config/i386/crtdll.h @@ -20,10 +20,9 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D_WIN32 -DWIN32 \ - -D__MINGW32__=0.2 -DWINNT -D_X86_=1 \ - -Asystem=winnt" +#undef EXTRA_OS_CPP_BUILTINS +#define EXTRA_OS_CPP_BUILTINS() \ + do { builtin_define ("__MINGW32__=0.2"); } while (0) #undef LIBGCC_SPEC #define LIBGCC_SPEC \ diff --git a/contrib/gcc/config/i386/cygwin.h b/contrib/gcc/config/i386/cygwin.h index a0b8834..03e372e 100644 --- a/contrib/gcc/config/i386/cygwin.h +++ b/contrib/gcc/config/i386/cygwin.h @@ -1,6 +1,6 @@ /* Operating system specific defines to be used when targeting GCC for hosting on Windows32, using a Unix style C library and tools. - Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -20,50 +20,57 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#define YES_UNDERSCORES - -#define DBX_DEBUGGING_INFO -#define SDB_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 +#define SDB_DEBUGGING_INFO 1 #define PREFERRED_DEBUGGING_TYPE DBX_DEBUG +#define TARGET_VERSION fprintf (stderr, " (x86 Cygwin)"); #define TARGET_EXECUTABLE_SUFFIX ".exe" #include <stdio.h> +#include "i386/i386.h" +#include "i386/unix.h" +#include "i386/bsd.h" #include "i386/gas.h" #include "dbxcoff.h" -/* Augment TARGET_SWITCHES with the cygwin/no-cygwin options. */ -#define MASK_WIN32 0x40000000 /* Use -lming32 interface */ -#define MASK_CYGWIN 0x20000000 /* Use -lcygwin interface */ -#define MASK_WINDOWS 0x10000000 /* Use windows interface */ -#define MASK_DLL 0x08000000 /* Use dll interface */ -#define MASK_NOP_FUN_DLLIMPORT 0x20000 /* Ignore dllimport for functions */ - -#define TARGET_WIN32 (target_flags & MASK_WIN32) -#define TARGET_CYGWIN (target_flags & MASK_CYGWIN) -#define TARGET_WINDOWS (target_flags & MASK_WINDOWS) -#define TARGET_DLL (target_flags & MASK_DLL) +/* Masks for subtarget switches used by other files. */ +#define MASK_NOP_FUN_DLLIMPORT 0x08000000 /* Ignore dllimport for functions */ + +/* Used in winnt.c. */ #define TARGET_NOP_FUN_DLLIMPORT (target_flags & MASK_NOP_FUN_DLLIMPORT) #undef SUBTARGET_SWITCHES #define SUBTARGET_SWITCHES \ -{ "cygwin", MASK_CYGWIN, \ - N_("Use the Cygwin interface") }, \ -{ "no-cygwin", MASK_WIN32, \ - N_("Use the Mingw32 interface") }, \ -{ "windows", MASK_WINDOWS, N_("Create GUI application") }, \ -{ "no-win32", -MASK_WIN32, N_("Don't set Windows defines") },\ -{ "win32", 0, N_("Set Windows defines") }, \ -{ "console", -MASK_WINDOWS, \ - N_("Create console application") }, \ -{ "dll", MASK_DLL, N_("Generate code for a DLL") }, \ -{ "nop-fun-dllimport", MASK_NOP_FUN_DLLIMPORT, \ - N_("Ignore dllimport for functions") }, \ -{ "no-nop-fun-dllimport", -MASK_NOP_FUN_DLLIMPORT, "" }, \ +{ "cygwin", 0, N_("Use the Cygwin interface") }, \ +{ "no-cygwin", 0, N_("Use the Mingw32 interface") }, \ +{ "windows", 0, N_("Create GUI application") }, \ +{ "no-win32", 0, N_("Don't set Windows defines") }, \ +{ "win32", 0, N_("Set Windows defines") }, \ +{ "console", 0, N_("Create console application") },\ +{ "dll", 0, N_("Generate code for a DLL") }, \ +{ "nop-fun-dllimport", MASK_NOP_FUN_DLLIMPORT, \ + N_("Ignore dllimport for functions") }, \ +{ "no-nop-fun-dllimport", -MASK_NOP_FUN_DLLIMPORT, "" }, \ { "threads", 0, N_("Use Mingw-specific thread support") }, -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D_X86_=1 -Asystem=winnt" +#define MAYBE_UWIN_CPP_BUILTINS() /* Nothing. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_X86_=1"); \ + builtin_assert ("system=winnt"); \ + builtin_define ("__stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("__cdecl=__attribute__((__cdecl__))"); \ + builtin_define ("__declspec(x)=__attribute__((x))"); \ + if (!flag_iso) \ + { \ + builtin_define ("_stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("_cdecl=__attribute__((__cdecl__))"); \ + } \ + MAYBE_UWIN_CPP_BUILTINS (); \ + } \ + while (0) #ifdef CROSS_COMPILE #define CYGWIN_INCLUDES "%{!nostdinc:-idirafter " CYGWIN_CROSS_DIR "/include}" @@ -104,13 +111,7 @@ Boston, MA 02111-1307, USA. */ existing args. */ #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} \ - -D__stdcall=__attribute__((__stdcall__)) \ - -D__cdecl=__attribute__((__cdecl__)) \ - %{!ansi:-D_stdcall=__attribute__((__stdcall__)) \ - -D_cdecl=__attribute__((__cdecl__))} \ - -D__declspec(x)=__attribute__((x)) \ - -D__i386__ -D__i386 \ +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} \ %{mno-win32:%{mno-cygwin: %emno-cygwin and mno-win32 are not compatible}} \ %{mno-cygwin:-D__MSVCRT__ -D__MINGW32__ %{mthreads:-D_MT} "\ MINGW_INCLUDES "} \ @@ -133,7 +134,8 @@ Boston, MA 02111-1307, USA. */ by calling the init function from the prologue. */ #undef LIBGCC_SPEC -#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} -lgcc %{mno-cygwin:-lmoldname -lmsvcrt}" +#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} \ + -lgcc %{mno-cygwin:-lmoldname -lmingwex -lmsvcrt}" /* This macro defines names of additional specifications to put in the specs that can be used in various specifications like CC1_SPEC. Its definition @@ -179,7 +181,6 @@ Boston, MA 02111-1307, USA. */ #define SIZE_TYPE "unsigned int" #define PTRDIFF_TYPE "int" -#define WCHAR_UNSIGNED 1 #define WCHAR_TYPE_SIZE 16 #define WCHAR_TYPE "short unsigned int" @@ -189,19 +190,6 @@ Boston, MA 02111-1307, USA. */ union tree_node; #define TREE union tree_node * - -/* Used to implement dllexport overriding dllimport semantics. It's also used - to handle vtables - the first pass won't do anything because - DECL_CONTEXT (DECL) will be 0 so i386_pe_dll{ex,im}port_p will return 0. - It's also used to handle dllimport override semantics. */ -#if 0 -#define REDO_SECTION_INFO_P(DECL) \ - ((DECL_ATTRIBUTES (DECL) != NULL_TREE) \ - || (TREE_CODE (DECL) == VAR_DECL && DECL_VIRTUAL_P (DECL))) -#else -#define REDO_SECTION_INFO_P(DECL) 1 -#endif - #undef EXTRA_SECTIONS #define EXTRA_SECTIONS in_drectve @@ -277,47 +265,16 @@ do { \ section and we need to set DECL_SECTION_NAME so we do that here. Note that we can be called twice on the same decl. */ -extern void i386_pe_encode_section_info PARAMS ((TREE)); - -#ifdef ENCODE_SECTION_INFO -#undef ENCODE_SECTION_INFO -#endif -#define ENCODE_SECTION_INFO(DECL) i386_pe_encode_section_info (DECL) - -/* Utility used only in this file. */ -#define I386_PE_STRIP_ENCODING(SYM_NAME) \ - ((SYM_NAME) + ((SYM_NAME)[0] == '@' \ - ? ((SYM_NAME)[3] == '*' ? 4 : 3) : 0) \ - + ((SYM_NAME)[0] == '*' ? 1 : 0)) - -/* This macro gets just the user-specified name - out of the string in a SYMBOL_REF. Discard - trailing @[NUM] encoded by ENCODE_SECTION_INFO. */ -#undef STRIP_NAME_ENCODING -#define STRIP_NAME_ENCODING(VAR,SYMBOL_NAME) \ -do { \ - const char *_p; \ - const char *_name = I386_PE_STRIP_ENCODING (SYMBOL_NAME); \ - for (_p = _name; *_p && *_p != '@'; ++_p) \ - ; \ - if (*_p == '@') \ - { \ - int _len = _p - _name; \ - char *_new_name = (char *) alloca (_len + 1); \ - strncpy (_new_name, _name, _len); \ - _new_name[_len] = '\0'; \ - (VAR) = _new_name; \ - } \ - else \ - (VAR) = _name; \ -} while (0) - +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING i386_pe_strip_name_encoding_full /* Output a reference to a label. */ #undef ASM_OUTPUT_LABELREF #define ASM_OUTPUT_LABELREF(STREAM, NAME) \ fprintf (STREAM, "%s%s", USER_LABEL_PREFIX, \ - I386_PE_STRIP_ENCODING (NAME)) \ + i386_pe_strip_name_encoding (NAME)) \ /* Output a common block. */ #undef ASM_OUTPUT_COMMON @@ -350,11 +307,13 @@ do { \ #define CHECK_STACK_LIMIT 4000 /* By default, target has a 80387, uses IEEE compatible arithmetic, - and returns float values in the 387 and needs stack probes */ -#undef TARGET_SUBTARGET_DEFAULT + returns float values in the 387 and needs stack probes. + We also align doubles to 64-bits for MSVC default compatibility. */ +#undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT \ - (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE) + (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE \ + | MASK_ALIGN_DOUBLE) /* This is how to output an assembler line that says to advance the location counter @@ -371,7 +330,7 @@ do { \ #define MULTIPLE_SYMBOL_SPACES extern void i386_pe_unique_section PARAMS ((TREE, int)); -#define UNIQUE_SECTION(DECL,RELOC) i386_pe_unique_section (DECL, RELOC) +#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section #define SUPPORTS_ONE_ONLY 1 @@ -423,9 +382,9 @@ extern void i386_pe_unique_section PARAMS ((TREE, int)); #undef ASM_COMMENT_START #define ASM_COMMENT_START " #" -/* DWARF2 Unwinding doesn't work with exception handling yet. To make it - work, we need to build a libgcc_s.dll, and dcrt0.o should be changed to - call __register_frame_info/__deregister_frame_info. */ +/* DWARF2 Unwinding doesn't work with exception handling yet. To make + it work, we need to build a libgcc_s.dll, and dcrt0.o should be + changed to call __register_frame_info/__deregister_frame_info. */ #define DWARF2_UNWIND_INFO 0 /* Don't assume anything about the header files. */ @@ -441,6 +400,15 @@ extern void i386_pe_unique_section PARAMS ((TREE, int)); const0_rtx)); \ } +/* Java Native Interface (JNI) methods on Win32 are invoked using the + stdcall calling convention. */ +#undef MODIFY_JNI_METHOD_CALL +#define MODIFY_JNI_METHOD_CALL(MDECL) \ + build_type_attribute_variant ((MDECL), \ + build_tree_list (get_identifier ("stdcall"), \ + NULL)) + + /* External function declarations. */ extern void i386_pe_record_external_function PARAMS ((const char *)); @@ -462,7 +430,7 @@ extern int i386_pe_dllimport_name_p PARAMS ((const char *)); #undef BIGGEST_FIELD_ALIGNMENT #define BIGGEST_FIELD_ALIGNMENT 64 -/* A bitfield declared as `int' forces `int' alignment for the struct. */ +/* A bit-field declared as `int' forces `int' alignment for the struct. */ #undef PCC_BITFIELD_TYPE_MATTERS #define PCC_BITFIELD_TYPE_MATTERS 1 #define GROUP_BITFIELDS_BY_ALIGN TYPE_NATIVE(rec) diff --git a/contrib/gcc/config/i386/darwin.h b/contrib/gcc/config/i386/darwin.h new file mode 100644 index 0000000..55c29fd --- /dev/null +++ b/contrib/gcc/config/i386/darwin.h @@ -0,0 +1,120 @@ +/* Target definitions for x86 running Darwin. + Copyright (C) 2001, 2002 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Enable Mach-O bits in generic x86 code. */ +#undef TARGET_MACHO +#define TARGET_MACHO 1 + +#define TARGET_VERSION fprintf (stderr, " (i386 Darwin)"); + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__i386__"); \ + builtin_define ("__LITTLE_ENDIAN__"); \ + builtin_define ("__MACH__"); \ + builtin_define ("__APPLE__"); \ + } \ + while (0) + +/* We want -fPIC by default, unless we're using -static to compile for + the kernel or some such. */ + +#undef CC1_SPEC +#define CC1_SPEC "%{!static:-fPIC}" + +/* The Darwin assembler mostly follows AT&T syntax. */ +#undef ASSEMBLER_DIALECT +#define ASSEMBLER_DIALECT ASM_ATT + +/* Define macro used to output shift-double opcodes when the shift + count is in %cl. Some assemblers require %cl as an argument; + some don't. This macro controls what to do: by default, don't + print %cl. */ + +#define SHIFT_DOUBLE_OMITS_COUNT 0 + +/* Define the syntax of pseudo-ops, labels and comments. */ + +/* String containing the assembler's comment-starter. */ + +#define ASM_COMMENT_START "#" + +/* By default, target has a 80387, uses IEEE compatible arithmetic, + and returns float values in the 387. */ + +#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS) + +/* TARGET_DEEP_BRANCH_PREDICTION is incompatible with Mach-O PIC. */ + +#undef TARGET_DEEP_BRANCH_PREDICTION +#define TARGET_DEEP_BRANCH_PREDICTION 0 + +/* Define the syntax of pseudo-ops, labels and comments. */ + +#define LPREFIX "L" + +/* Assembler pseudos to introduce constants of various size. */ + +#define ASM_BYTE_OP "\t.byte\t" +#define ASM_SHORT "\t.word\t" +#define ASM_LONG "\t.long\t" +/* Darwin as doesn't do ".quad". */ + +#undef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { if ((LOG) != 0) \ + { \ + if (in_text_section ()) \ + fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \ + else \ + fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \ + } \ + } while (0) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".comm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%u\n", (ROUNDED))) + +/* This says how to output an assembler line + to define a local common symbol. */ + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%u\n", (ROUNDED))) + +/* Darwin profiling -- call mcount. */ +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(FILE, LABELNO) \ + do { \ + if (MACHOPIC_INDIRECT) \ + { \ + const char *name = machopic_stub_name ("*mcount"); \ + fprintf (FILE, "\tcall %s\n", name+1); /* skip '&' */ \ + machopic_validate_stub_or_non_lazy_ptr (name, /*stub:*/1); \ + } \ + else fprintf (FILE, "\tcall mcount\n"); \ + } while (0) diff --git a/contrib/gcc/config/i386/djgpp.h b/contrib/gcc/config/i386/djgpp.h index 600a11e..6780780 100644 --- a/contrib/gcc/config/i386/djgpp.h +++ b/contrib/gcc/config/i386/djgpp.h @@ -1,5 +1,6 @@ /* Configuration for an i386 running MS-DOS with DJGPP. - Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,21 +19,19 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "dbxcoff.h" - /* Support generation of DWARF2 debugging info. */ -#define DWARF2_DEBUGGING_INFO +#define DWARF2_DEBUGGING_INFO 1 /* Don't assume anything about the header files. */ #define NO_IMPLICIT_EXTERN_C -#define HANDLE_SYSV_PRAGMA +#define HANDLE_SYSV_PRAGMA 1 /* Enable parsing of #pragma pack(push,<n>) and #pragma pack(pop). */ #define HANDLE_PRAGMA_PACK_PUSH_POP 1 -#define YES_UNDERSCORES - +#include "i386/unix.h" +#include "i386/bsd.h" #include "i386/gas.h" /* If defined, a C expression whose value is a string containing the @@ -83,13 +82,18 @@ Boston, MA 02111-1307, USA. */ (((NAME)[0] == '/') || ((NAME)[0] == '\\') || \ (((NAME)[0] >= 'A') && ((NAME)[0] <= 'z') && ((NAME)[1] == ':'))) -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__MSDOS__ -D__GO32__ -Asystem=msdos" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("MSDOS"); \ + builtin_define_std ("GO32"); \ + builtin_assert ("system=msdos"); \ + } \ + while (0) /* Include <sys/version.h> so __DJGPP__ and __DJGPP_MINOR__ are defined. */ #undef CPP_SPEC -#define CPP_SPEC "-remap %(cpp_cpu) %{posix:-D_POSIX_SOURCE} \ - %{!ansi:%{!std=c*:%{!std=i*:-DMSDOS}}} %{!ansi:%{!std=c*:%{!std=i*:-DGO32}}} \ +#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \ -imacros %s../include/sys/version.h" /* We need to override link_command_spec in gcc.c so support -Tdjgpp.djl. @@ -132,6 +136,8 @@ Boston, MA 02111-1307, USA. */ #undef ASM_FILE_START #define ASM_FILE_START(FILE) \ do { \ + if (ix86_asm_dialect == ASM_INTEL) \ + fputs ("\t.intel_syntax\n", FILE); \ output_file_directive (FILE, main_input_filename); \ } while (0) @@ -160,9 +166,6 @@ Boston, MA 02111-1307, USA. */ /* Definitions for types and sizes. Wide characters are 16-bits long so Win32 compiler add-ons will be wide character compatible. */ -#undef WCHAR_UNSIGNED -#define WCHAR_UNSIGNED 1 - #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE 16 @@ -186,6 +189,8 @@ Boston, MA 02111-1307, USA. */ #undef MASK_BNU210 #define MASK_BNU210 (0x40000000) +#define TARGET_VERSION fprintf (stderr, " (80386, MS-DOS DJGPP)"); + #undef SUBTARGET_SWITCHES #define SUBTARGET_SWITCHES \ { "no-bnu210", -MASK_BNU210, "Ignored (obsolete)" }, \ @@ -206,37 +211,3 @@ while (0) /* Support for C++ templates. */ #undef MAKE_DECL_ONE_ONLY #define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) - -#undef UNIQUE_SECTION -#define UNIQUE_SECTION(DECL,RELOC) \ -do { \ - int len; \ - const char *name, *prefix; \ - char *string; \ - \ - name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL)); \ - /* Strip off any encoding in fnname. */ \ - STRIP_NAME_ENCODING (name, name); \ - \ - if (! DECL_ONE_ONLY (DECL)) \ - { \ - if (TREE_CODE (DECL) == FUNCTION_DECL) \ - prefix = ".text."; \ - else if (DECL_READONLY_SECTION (DECL, RELOC)) \ - prefix = ".rodata."; \ - else \ - prefix = ".data."; \ - } \ - else if (TREE_CODE (DECL) == FUNCTION_DECL) \ - prefix = ".gnu.linkonce.t."; \ - else if (DECL_READONLY_SECTION (DECL, RELOC)) \ - prefix = ".gnu.linkonce.r."; \ - else \ - prefix = ".gnu.linkonce.d."; \ - \ - len = strlen (name) + strlen (prefix); \ - string = alloca (len + 1); \ - sprintf (string, "%s%s", prefix, name); \ - \ - DECL_SECTION_NAME (DECL) = build_string (len, string); \ -} while (0) diff --git a/contrib/gcc/config/i386/freebsd-aout.h b/contrib/gcc/config/i386/freebsd-aout.h index 0a3c6d7..85e2703 100644 --- a/contrib/gcc/config/i386/freebsd-aout.h +++ b/contrib/gcc/config/i386/freebsd-aout.h @@ -1,8 +1,9 @@ /* Definitions of target machine for GNU compiler for Intel 80386 running FreeBSD. - Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002 Free Software - Foundation, Inc. + Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002, 2003 + Free Software Foundation, Inc. Contributed by Poul-Henning Kamp <phk@login.dkuug.dk> + Continued development by David O'Brien <obrien@NUXI.org> This file is part of GNU CC. @@ -21,14 +22,9 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* This is tested by i386gas.h. */ -#define YES_UNDERSCORES - /* Don't assume anything about the header files. */ #define NO_IMPLICIT_EXTERN_C -#include "i386/gstabs.h" - /* This goes away when the math-emulator is fixed */ #undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT \ @@ -41,9 +37,16 @@ Boston, MA 02111-1307, USA. */ defaults.h works. */ #undef ASM_PREFERRED_EH_DATA_FORMAT -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -D__FreeBSD__\ - -Asystem=unix -Asystem=bsd -Asystem=FreeBSD" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_define ("__FreeBSD__"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=bsd"); \ + builtin_assert ("system=FreeBSD"); \ + } \ + while (0) /* Like the default, except no -lg. */ #define LIB_SPEC "%{!shared:%{!pg:-lc}%{pg:-lc_p}}" @@ -57,8 +60,6 @@ Boston, MA 02111-1307, USA. */ #undef WCHAR_TYPE #define WCHAR_TYPE "int" -#define WCHAR_UNSIGNED 0 - #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE BITS_PER_WORD @@ -93,22 +94,13 @@ Boston, MA 02111-1307, USA. */ /* Profiling routines, partially copied from i386/osfrose.h. */ -/* Redefine this to use %eax instead of %edx. */ -#undef FUNCTION_PROFILER -#define FUNCTION_PROFILER(FILE, LABELNO) \ -{ \ - if (flag_pic) \ - { \ - fprintf (FILE, "\tleal %sP%d@GOTOFF(%%ebx),%%eax\n", \ - LPREFIX, (LABELNO)); \ - fprintf (FILE, "\tcall *mcount@GOT(%%ebx)\n"); \ - } \ - else \ - { \ - fprintf (FILE, "\tmovl $%sP%d,%%eax\n", LPREFIX, (LABELNO)); \ - fprintf (FILE, "\tcall mcount\n"); \ - } \ -} +/* Tell final.c that we don't need a label passed to mcount. */ +#define NO_PROFILE_COUNTERS 1 + +#undef MCOUNT_NAME +#define MCOUNT_NAME "mcount" +#undef PROFILE_COUNT_REGISTER +#define PROFILE_COUNT_REGISTER "eax" /* * Some imports from svr4.h in support of shared libraries. @@ -123,6 +115,7 @@ Boston, MA 02111-1307, USA. */ #define TYPE_ASM_OP "\t.type\t" #define SIZE_ASM_OP "\t.size\t" +#define SET_ASM_OP "\t.set\t" /* The following macro defines the format used to output the second operand of the .type assembler directive. Different svr4 assemblers @@ -132,6 +125,12 @@ Boston, MA 02111-1307, USA. */ #define TYPE_OPERAND_FMT "@%s" +#define HANDLE_SYSV_PRAGMA 1 + +#define ASM_WEAKEN_LABEL(FILE,NAME) \ + do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ + fputc ('\n', FILE); } while (0) + /* Write the extra assembler code needed to declare a function's result. Most svr4 assemblers don't require any special declaration of the result value, but there are exceptions. */ @@ -149,36 +148,36 @@ Boston, MA 02111-1307, USA. */ Some svr4 assemblers need to also have something extra said about the function's return value. We allow for that here. */ -#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ - do { \ - fprintf (FILE, "%s", TYPE_ASM_OP); \ - assemble_name (FILE, NAME); \ - putc (',', FILE); \ - fprintf (FILE, TYPE_OPERAND_FMT, "function"); \ - putc ('\n', FILE); \ - ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ - ASM_OUTPUT_LABEL(FILE, NAME); \ - } while (0) +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } \ + while (0) /* Write the extra assembler code needed to declare an object properly. */ -#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ - do { \ - fprintf (FILE, "%s", TYPE_ASM_OP); \ - assemble_name (FILE, NAME); \ - putc (',', FILE); \ - fprintf (FILE, TYPE_OPERAND_FMT, "object"); \ - putc ('\n', FILE); \ - size_directive_output = 0; \ - if (!flag_inhibit_size_directive && DECL_SIZE (DECL)) \ - { \ - size_directive_output = 1; \ - fprintf (FILE, "%s", SIZE_ASM_OP); \ - assemble_name (FILE, NAME); \ - fprintf (FILE, ",%d\n", int_size_in_bytes (TREE_TYPE (DECL))); \ - } \ - ASM_OUTPUT_LABEL(FILE, NAME); \ - } while (0) +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + do \ + { \ + HOST_WIDE_INT size; \ + \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive \ + && (DECL) && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ + } \ + \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } \ + while (0) /* Output the size directive for a decl in rest_of_decl_compilation in the case where we did not do so before the initializer. @@ -189,37 +188,24 @@ Boston, MA 02111-1307, USA. */ #define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END) \ do { \ const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + HOST_WIDE_INT size; \ if (!flag_inhibit_size_directive && DECL_SIZE (DECL) \ && ! AT_END && TOP_LEVEL \ && DECL_INITIAL (DECL) == error_mark_node \ && !size_directive_output) \ { \ - fprintf (FILE, "%s", SIZE_ASM_OP); \ - assemble_name (FILE, name); \ - fprintf (FILE, ",%d\n", int_size_in_bytes (TREE_TYPE (DECL)));\ - } \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ + } \ } while (0) - /* This is how to declare the size of a function. */ #define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ do { \ if (!flag_inhibit_size_directive) \ - { \ - char label[256]; \ - static int labelno; \ - labelno++; \ - ASM_GENERATE_INTERNAL_LABEL (label, "Lfe", labelno); \ - ASM_OUTPUT_INTERNAL_LABEL (FILE, "Lfe", labelno); \ - fprintf (FILE, "%s", SIZE_ASM_OP); \ - assemble_name (FILE, (FNAME)); \ - fprintf (FILE, ","); \ - assemble_name (FILE, label); \ - fprintf (FILE, "-"); \ - assemble_name (FILE, (FNAME)); \ - putc ('\n', FILE); \ - } \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ } while (0) #define ASM_SPEC " %| %{fpic:-k} %{fPIC:-k}" diff --git a/contrib/gcc/config/i386/freebsd.h b/contrib/gcc/config/i386/freebsd.h index 274260b..603e3ac 100644 --- a/contrib/gcc/config/i386/freebsd.h +++ b/contrib/gcc/config/i386/freebsd.h @@ -23,23 +23,22 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 FreeBSD/ELF)"); /* Override the default comment-starter of "/". */ -#undef ASM_COMMENT_START +#undef ASM_COMMENT_START #define ASM_COMMENT_START "#" -#undef ASM_APP_ON +#undef ASM_APP_ON #define ASM_APP_ON "#APP\n" -#undef ASM_APP_OFF +#undef ASM_APP_OFF #define ASM_APP_OFF "#NO_APP\n" -#undef SET_ASM_OP +#undef SET_ASM_OP #define SET_ASM_OP "\t.set\t" -#undef DBX_REGISTER_NUMBER +#undef DBX_REGISTER_NUMBER #define DBX_REGISTER_NUMBER(n) \ (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n]) @@ -48,24 +47,18 @@ Boston, MA 02111-1307, USA. */ /* Tell final.c that we don't need a label passed to mcount. */ -#undef FUNCTION_PROFILER -#define FUNCTION_PROFILER(FILE, LABELNO) \ -{ \ - if (flag_pic) \ - fprintf ((FILE), "\tcall *.mcount@GOT(%%ebx)\n"); \ - else \ - fprintf ((FILE), "\tcall .mcount\n"); \ -} +#undef MCOUNT_NAME +#define MCOUNT_NAME ".mcount" /* Make gcc agree with <machine/ansi.h>. */ -#undef SIZE_TYPE +#undef SIZE_TYPE #define SIZE_TYPE "unsigned int" -#undef PTRDIFF_TYPE +#undef PTRDIFF_TYPE #define PTRDIFF_TYPE "int" -#undef WCHAR_TYPE_SIZE +#undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE BITS_PER_WORD /* Provide a STARTFILE_SPEC appropriate for FreeBSD. Here we add @@ -139,3 +132,13 @@ Boston, MA 02111-1307, USA. */ #undef DEFAULT_PCC_STRUCT_RETURN #define DEFAULT_PCC_STRUCT_RETURN 0 + +/* FreeBSD sets the rounding precision of the FPU to 53 bits. Let the + compiler get the contents of <float.h> and std::numeric_limits correct. */ +#define SUBTARGET_OVERRIDE_OPTIONS \ + do { \ + real_format_for_mode[XFmode - QFmode] \ + = &ieee_extended_intel_96_round_53_format; \ + real_format_for_mode[TFmode - QFmode] \ + = &ieee_extended_intel_96_round_53_format; \ + } while (0) diff --git a/contrib/gcc/config/i386/freebsd64.h b/contrib/gcc/config/i386/freebsd64.h index ebd6618..12ca062 100644 --- a/contrib/gcc/config/i386/freebsd64.h +++ b/contrib/gcc/config/i386/freebsd64.h @@ -20,7 +20,7 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef TARGET_VERSION +#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (FreeBSD/x86-64 ELF)"); /* Provide a LINK_SPEC appropriate for the FreeBSD/x86-64 ELF target. @@ -29,8 +29,7 @@ Boston, MA 02111-1307, USA. */ #undef LINK_SPEC #define LINK_SPEC "\ - %{!m32:-m elf_x86_64} \ - %{m32:-m elf_i386} \ + %{m32:-m elf_i386_fbsd} \ %{Wl,*:%*} \ %{v:-V} \ %{assert*} %{R*} %{rpath*} %{defsym*} \ diff --git a/contrib/gcc/config/i386/gas.h b/contrib/gcc/config/i386/gas.h index 6c01b07..075d749 100644 --- a/contrib/gcc/config/i386/gas.h +++ b/contrib/gcc/config/i386/gas.h @@ -1,5 +1,5 @@ /* Definitions for Intel 386 using GAS. - Copyright (C) 1988, 1993, 1994, 1996 Free Software Foundation, Inc. + Copyright (C) 1988, 1993, 1994, 1996, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -21,13 +21,6 @@ Boston, MA 02111-1307, USA. */ /* Note that i386/seq-gas.h is a GAS configuration that does not use this file. */ -#include "i386/i386.h" - -#ifndef YES_UNDERSCORES -/* Define this now, because i386/bsd.h tests it. */ -#define NO_UNDERSCORES -#endif - /* Use the bsd assembler syntax. */ /* we need to do this because gas is really a bsd style assembler, * and so doesn't work well this these att-isms: @@ -44,24 +37,13 @@ Boston, MA 02111-1307, USA. */ * people who want both form will have to compile twice. */ -#include "i386/bsd.h" - /* these come from i386/bsd.h, but are specific to sequent */ #undef DBX_NO_XREFS #undef DBX_CONTIN_LENGTH /* Ask for COFF symbols. */ -#define SDB_DEBUGGING_INFO - -/* Specify predefined symbols in preprocessor. */ - -#define CPP_PREDEFINES "-Dunix" -#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE}" - -/* Allow #sccs in preprocessor. */ - -#define SCCS_DIRECTIVE +#define SDB_DEBUGGING_INFO 1 /* Output #ident as a .ident. */ @@ -139,28 +121,6 @@ Boston, MA 02111-1307, USA. */ /* Print opcodes the way that GAS expects them. */ #define GAS_MNEMONICS 1 -#ifdef NO_UNDERSCORES /* If user-symbols don't have underscores, - then it must take more than `L' to identify - a label that should be ignored. */ - -/* This is how to store into the string BUF - the symbol_ref name of an internal numbered label where - PREFIX is the class of label and NUM is the number within the class. - This is suitable for output with `assemble_name'. */ - -#undef ASM_GENERATE_INTERNAL_LABEL -#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \ - sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER)) - -/* This is how to output an internal numbered label where - PREFIX is the class of label and NUM is the number within the class. */ - -#undef ASM_OUTPUT_INTERNAL_LABEL -#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ - fprintf (FILE, ".%s%d:\n", PREFIX, NUM) - -#endif /* NO_UNDERSCORES */ - /* Output at beginning of assembler file. */ /* The .file command should always begin the output. */ #undef ASM_FILE_START diff --git a/contrib/gcc/config/i386/gnu.h b/contrib/gcc/config/i386/gnu.h index b14328f..acf2d3f 100644 --- a/contrib/gcc/config/i386/gnu.h +++ b/contrib/gcc/config/i386/gnu.h @@ -3,14 +3,29 @@ #undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 GNU)"); -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__ELF__ -DMACH -Asystem=mach \ - -Dunix -Asystem=unix -Asystem=posix -D__gnu_hurd__ -D__GNU__ -Asystem=gnu" +#undef TARGET_OS_CPP_BUILTINS /* config.gcc includes i386/linux.h. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("MACH"); \ + builtin_define_std ("unix"); \ + builtin_define ("__ELF__"); \ + builtin_define ("__GNU__"); \ + builtin_define ("__gnu_hurd__"); \ + builtin_assert ("system=gnu"); \ + builtin_assert ("system=mach"); \ + builtin_assert ("system=posix"); \ + builtin_assert ("system=unix"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ + while (0) #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) \ - %{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} \ - %{posix:-D_POSIX_SOURCE} %{bsd:-D_BSD_SOURCE}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{bsd:-D_BSD_SOURCE}" #undef CC1_SPEC #define CC1_SPEC "%(cc1_cpu)" diff --git a/contrib/gcc/config/i386/gstabs.h b/contrib/gcc/config/i386/gstabs.h index 5f0ae34..e9a6218 100644 --- a/contrib/gcc/config/i386/gstabs.h +++ b/contrib/gcc/config/i386/gstabs.h @@ -1,9 +1,7 @@ -#include "i386/gas.h" - /* We do not want to output SDB debugging information. */ #undef SDB_DEBUGGING_INFO /* We want to output DBX debugging information. */ -#define DBX_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 diff --git a/contrib/gcc/config/i386/gthr-win32.c b/contrib/gcc/config/i386/gthr-win32.c new file mode 100644 index 0000000..06dc204 --- /dev/null +++ b/contrib/gcc/config/i386/gthr-win32.c @@ -0,0 +1,174 @@ +/* Implementation of W32-specific threads compatibility routines for + libgcc2. */ + +/* Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc. + Contributed by Mumit Khan <khan@xraylith.wisc.edu>. + Modified and moved to separate file by Danny Smith + <dannysmith@users.sourceforge.net>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. */ + + +#ifndef __GTHREAD_HIDE_WIN32API +# define __GTHREAD_HIDE_WIN32API +#endif +#include <gthr-win32.h> +#include <windows.h> + +/* Windows32 threads specific definitions. The windows32 threading model + does not map well into pthread-inspired gcc's threading model, and so + there are caveats one needs to be aware of. + + 1. The destructor supplied to __gthread_key_create is ignored for + generic x86-win32 ports. This will certainly cause memory leaks + due to unreclaimed eh contexts (sizeof (eh_context) is at least + 24 bytes for x86 currently). + + This memory leak may be significant for long-running applications + that make heavy use of C++ EH. + + However, Mingw runtime (version 0.3 or newer) provides a mechanism + to emulate pthreads key dtors; the runtime provides a special DLL, + linked in if -mthreads option is specified, that runs the dtors in + the reverse order of registration when each thread exits. If + -mthreads option is not given, a stub is linked in instead of the + DLL, which results in memory leak. Other x86-win32 ports can use + the same technique of course to avoid the leak. + + 2. The error codes returned are non-POSIX like, and cast into ints. + This may cause incorrect error return due to truncation values on + hw where sizeof (DWORD) > sizeof (int). + + 3. We might consider using Critical Sections instead of Windows32 + mutexes for better performance, but emulating __gthread_mutex_trylock + interface becomes more complicated (Win9x does not support + TryEnterCriticalSectioni, while NT does). + + The basic framework should work well enough. In the long term, GCC + needs to use Structured Exception Handling on Windows32. */ + +int +__gthr_win32_once (__gthread_once_t *once, void (*func) (void)) +{ + if (once == NULL || func == NULL) + return EINVAL; + + if (! once->done) + { + if (InterlockedIncrement (&(once->started)) == 0) + { + (*func) (); + once->done = TRUE; + } + else + { + /* Another thread is currently executing the code, so wait for it + to finish; yield the CPU in the meantime. If performance + does become an issue, the solution is to use an Event that + we wait on here (and set above), but that implies a place to + create the event before this routine is called. */ + while (! once->done) + Sleep (0); + } + } + return 0; +} + +/* Windows32 thread local keys don't support destructors; this leads to + leaks, especially in threaded applications making extensive use of + C++ EH. Mingw uses a thread-support DLL to work-around this problem. */ + +int +__gthr_win32_key_create (__gthread_key_t *key, void (*dtor) (void *)) +{ + int status = 0; + DWORD tls_index = TlsAlloc (); + if (tls_index != 0xFFFFFFFF) + { + *key = tls_index; +#ifdef MINGW32_SUPPORTS_MT_EH + /* Mingw runtime will run the dtors in reverse order for each thread + when the thread exits. */ + status = __mingwthr_key_dtor (*key, dtor); +#endif + } + else + status = (int) GetLastError (); + return status; +} + +int +__gthr_win32_key_delete (__gthread_key_t key) +{ + return (TlsFree (key) != 0) ? 0 : (int) GetLastError (); +} + +void * +__gthr_win32_getspecific (__gthread_key_t key) +{ + DWORD lasterror; + void *ptr; + lasterror = GetLastError(); + ptr = TlsGetValue(key); + SetLastError( lasterror ); + return ptr; +} + +int +__gthr_win32_setspecific (__gthread_key_t key, const void *ptr) +{ + return (TlsSetValue (key, (void*) ptr) != 0) ? 0 : (int) GetLastError (); +} + +void +__gthr_win32_mutex_init_function (__gthread_mutex_t *mutex) +{ + /* Create unnamed mutex with default security attr and no initial owner. */ + *mutex = CreateMutex (NULL, 0, NULL); +} + +int +__gthr_win32_mutex_lock (__gthread_mutex_t *mutex) +{ + if (WaitForSingleObject (*mutex, INFINITE) == WAIT_OBJECT_0) + return 0; + else + return 1; +} + +int +__gthr_win32_mutex_trylock (__gthread_mutex_t *mutex) +{ + if (WaitForSingleObject (*mutex, 0) == WAIT_OBJECT_0) + return 0; + else + return 1; +} + +int +__gthr_win32_mutex_unlock (__gthread_mutex_t *mutex) +{ + return (ReleaseMutex (*mutex) != 0) ? 0 : 1; +} diff --git a/contrib/gcc/config/i386/i386-aout.h b/contrib/gcc/config/i386/i386-aout.h index 7385bec..ca0cb25 100644 --- a/contrib/gcc/config/i386/i386-aout.h +++ b/contrib/gcc/config/i386/i386-aout.h @@ -1,7 +1,7 @@ /* Definitions for "naked" Intel 386 using a.out (or coff encap'd a.out) object format and stabs debugging info. - Copyright (C) 1994 Free Software Foundation, Inc. + Copyright (C) 1994, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -21,14 +21,6 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* This is tested by gas.h. */ -#define YES_UNDERSCORES - -#include "i386/gstabs.h" - -/* Specify predefined symbols in preprocessor. */ - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "" +#define TARGET_VERSION fprintf (stderr, " (80386, BSD a.out syntax)"); /* end of i386-aout.h */ diff --git a/contrib/gcc/config/i386/i386-coff.h b/contrib/gcc/config/i386/i386-coff.h index c1ae670..e8c5de9 100644 --- a/contrib/gcc/config/i386/i386-coff.h +++ b/contrib/gcc/config/i386/i386-coff.h @@ -1,7 +1,7 @@ /* Definitions for "naked" Intel 386 using coff object format files and coff debugging info. - Copyright (C) 1994, 2000 Free Software Foundation, Inc. + Copyright (C) 1994, 2000, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -21,18 +21,13 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "i386/gas.h" -#include "dbxcoff.h" +#define TARGET_VERSION fprintf (stderr, " (80386, COFF BSD syntax)"); -/* Specify predefined symbols in preprocessor. */ - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "" +#define TARGET_OS_CPP_BUILTINS() /* Sweet FA. */ /* We want to be able to get DBX debugging information via -gstabs. */ -#undef DBX_DEBUGGING_INFO -#define DBX_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 #undef PREFERRED_DEBUGGING_TYPE #define PREFERRED_DEBUGGING_TYPE SDB_DEBUG @@ -40,4 +35,36 @@ Boston, MA 02111-1307, USA. */ /* Switch into a generic section. */ #define TARGET_ASM_NAMED_SECTION default_coff_asm_named_section +/* Prefix for internally generated assembler labels. If we aren't using + underscores, we are using prefix `.'s to identify labels that should + be ignored, as in `i386/gas.h' --karl@cs.umb.edu */ + +#undef LPREFIX +#define LPREFIX ".L" + +/* The prefix to add to user-visible assembler symbols. */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* If user-symbols don't have underscores, + then it must take more than `L' to identify + a label that should be ignored. */ + +/* This is how to store into the string BUF + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \ + sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER)) + +/* This is how to output an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. */ + +#undef ASM_OUTPUT_INTERNAL_LABEL +#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ + fprintf (FILE, ".%s%d:\n", PREFIX, NUM) + /* end of i386-coff.h */ diff --git a/contrib/gcc/config/i386/i386-interix.h b/contrib/gcc/config/i386/i386-interix.h index 76357f3..d309087 100644 --- a/contrib/gcc/config/i386/i386-interix.h +++ b/contrib/gcc/config/i386/i386-interix.h @@ -1,5 +1,5 @@ /* Target definitions for GNU compiler for Intel 80386 running Interix - Parts Copyright (C) 1991, 1999, 2000 Free Software Foundation, Inc. + Parts Copyright (C) 1991, 1999, 2000, 2002 Free Software Foundation, Inc. Parts: by Douglas B. Rupp (drupp@cs.washington.edu). @@ -24,17 +24,13 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#define YES_UNDERSCORES - -/* YES_UNDERSCORES must precede gas.h */ -#include <i386/gas.h> /* The rest must follow. */ -#define DBX_DEBUGGING_INFO -#define SDB_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 +#define SDB_DEBUGGING_INFO 1 #define PREFERRED_DEBUGGING_TYPE DBX_DEBUG -#define HANDLE_SYSV_PRAGMA +#define HANDLE_SYSV_PRAGMA 1 #undef HANDLE_PRAGMA_WEAK /* until the link format can handle it */ /* By default, target has a 80387, uses IEEE compatible arithmetic, @@ -48,7 +44,6 @@ Boston, MA 02111-1307, USA. */ #undef TARGET_CPU_DEFAULT #define TARGET_CPU_DEFAULT 2 /* 486 */ -#define WCHAR_UNSIGNED 1 #define WCHAR_TYPE_SIZE 16 #define WCHAR_TYPE "short unsigned int" @@ -58,35 +53,39 @@ Boston, MA 02111-1307, USA. */ #define ASM_LOAD_ADDR(loc, reg) " leal " #loc "," #reg "\n" /* cpp handles __STDC__ */ -#undef CPP_PREDEFINES -#define CPP_PREDEFINES " \ - -D__INTERIX \ - -D__OPENNT \ - -D_M_IX86=300 -D_X86_=1 \ - -D__stdcall=__attribute__((__stdcall__)) \ - -D__cdecl=__attribute__((__cdecl__)) \ - -D__declspec(x)=__attribute__((x)) \ - -Asystem=unix -Asystem=interix" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__INTERIX"); \ + builtin_define ("__OPENNT"); \ + builtin_define ("_M_IX86=300"); \ + builtin_define ("_X86_=1"); \ + builtin_define ("__stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("__cdecl=__attribute__((__cdecl__))"); \ + builtin_define ("__declspec(x)=__attribute__((x))"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=interix"); \ + if (preprocessing_asm_p ()) \ + builtin_define_std ("LANGUAGE_ASSEMBLY"); \ + else \ + { \ + builtin_define_std ("LANGUAGE_C"); \ + if (c_language == clk_cplusplus) \ + builtin_define_std ("LANGUAGE_C_PLUS_PLUS"); \ + if (flag_objc) \ + builtin_define_std ("LANGUAGE_OBJECTIVE_C"); \ + } \ + } \ + while (0) #undef CPP_SPEC /* Write out the correct language type definition for the header files. Unless we have assembler language, write out the symbols for C. - cpp_cpu is an Intel specific variant. See i386.h mieee is an Alpha specific variant. Cross polination a bad idea. */ -#define CPP_SPEC "\ -%{!.S: -D__LANGUAGE_C__ -D__LANGUAGE_C %{!ansi:-DLANGUAGE_C}} \ -%{.S: -D__LANGUAGE_ASSEMBLY__ -D__LANGUAGE_ASSEMBLY %{!ansi:-DLANGUAGE_ASSEMBLY}} \ -%{.cc: -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus} \ -%{.cxx: -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus} \ -%{.C: -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus} \ -%{.m: -D__LANGUAGE_OBJECTIVE_C__ -D__LANGUAGE_OBJECTIVE_C} \ --remap \ -%(cpp_cpu) \ -%{posix:-D_POSIX_SOURCE} \ +#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \ -isystem %$INTERIX_ROOT/usr/include" -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 Interix)"); /* The global __fltused is necessary to cause the printf/scanf routines @@ -238,50 +237,15 @@ Boston, MA 02111-1307, USA. */ #undef LD_INIT_SWITCH #undef LD_FINI_SWITCH -#define EH_FRAME_IN_DATA_SECTION - -/* Note that there appears to be two different ways to support const - sections at the moment. You can either #define the symbol - READONLY_DATA_SECTION (giving it some code which switches to the - readonly data section) or else you can #define the symbols - EXTRA_SECTIONS, EXTRA_SECTION_FUNCTIONS, SELECT_SECTION, and - SELECT_RTX_SECTION. We do both here just to be on the safe side. */ - -#define USE_CONST_SECTION 1 - -#define CONST_SECTION_ASM_OP "\t.section\t.rdata,\"r\"" - -/* A default list of other sections which we might be "in" at any given - time. For targets that use additional sections (e.g. .tdesc) you - should override this definition in the target-specific file which - includes this file. */ - -#undef EXTRA_SECTIONS -#define EXTRA_SECTIONS in_const +/* The following are needed for us to be able to use winnt.c, but are not + otherwise meaningful to Interix. (The functions that use these are + never called because we don't do DLLs.) */ +#define TARGET_NOP_FUN_DLLIMPORT 1 +#define drectve_section() /* nothing */ -/* A default list of extra section function definitions. For targets - that use additional sections (e.g. .tdesc) you should override this - definition in the target-specific file which includes this file. */ - -#undef EXTRA_SECTION_FUNCTIONS -#define EXTRA_SECTION_FUNCTIONS \ - CONST_SECTION_FUNCTION - -#undef READONLY_DATA_SECTION -#define READONLY_DATA_SECTION() const_section () +#define EH_FRAME_IN_DATA_SECTION -#define CONST_SECTION_FUNCTION \ -void \ -const_section () \ -{ \ - if (!USE_CONST_SECTION) \ - text_section(); \ - else if (in_section != in_const) \ - { \ - fprintf (asm_out_file, "%s\n", CONST_SECTION_ASM_OP); \ - in_section = in_const; \ - } \ -} +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rdata,\"r\"" /* The MS compilers take alignment as a number of bytes, so we do as well */ #undef ASM_OUTPUT_ALIGN @@ -346,57 +310,13 @@ while (0) /* Define this macro if references to a symbol must be treated differently depending on something about the variable or - function named by the symbol (such as what section it is in). - - Apply stddef, handle (as yet unimplemented) pic. - - stddef renaming does NOT apply to Alpha. */ - -union tree_node; -const char *gen_stdcall_suffix PARAMS ((union tree_node *)); - -#undef ENCODE_SECTION_INFO -#define ENCODE_SECTION_INFO(DECL) \ -do \ - { \ - if (flag_pic) \ - { \ - rtx rtl = (TREE_CODE_CLASS (TREE_CODE (DECL)) != 'd' \ - ? TREE_CST_RTL (DECL) : DECL_RTL (DECL)); \ - SYMBOL_REF_FLAG (XEXP (rtl, 0)) \ - = (TREE_CODE_CLASS (TREE_CODE (DECL)) != 'd' \ - || ! TREE_PUBLIC (DECL)); \ - } \ - if (TREE_CODE (DECL) == FUNCTION_DECL) \ - if (lookup_attribute ("stdcall", \ - TYPE_ATTRIBUTES (TREE_TYPE (DECL)))) \ - XEXP (DECL_RTL (DECL), 0) = \ - gen_rtx (SYMBOL_REF, Pmode, gen_stdcall_suffix (DECL)); \ - } \ -while (0) + function named by the symbol (such as what section it is in). */ + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING i386_pe_strip_name_encoding_full -/* This macro gets just the user-specified name - out of the string in a SYMBOL_REF. Discard - trailing @[NUM] encoded by ENCODE_SECTION_INFO. */ -#undef STRIP_NAME_ENCODING -#define STRIP_NAME_ENCODING(VAR,SYMBOL_NAME) \ -do { \ - const char *_p; \ - const char *_name = SYMBOL_NAME; \ - for (_p = _name; *_p && *_p != '@'; ++_p) \ - ; \ - if (*_p == '@') \ - { \ - int _len = _p - _name; \ - char *_new_name = (char *) alloca (_len + 1); \ - strncpy (_new_name, _name, _len); \ - _new_name[_len] = '\0'; \ - (VAR) = _new_name; \ - } \ - else \ - (VAR) = _name; \ -} while (0) - #if 0 /* Turn this back on when the linker is updated to handle grouped .data$ sections correctly. See corresponding note in i386/interix.c. @@ -408,8 +328,8 @@ do { \ symbols must be explicitly imported from shared libraries (DLLs). */ #define MULTIPLE_SYMBOL_SPACES -extern void i386_pe_unique_section (); -#define UNIQUE_SECTION(DECL,RELOC) i386_pe_unique_section (DECL, RELOC) +extern void i386_pe_unique_section PARAMS ((tree, int)); +#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section #define SUPPORTS_ONE_ONLY 1 #endif /* 0 */ diff --git a/contrib/gcc/config/i386/i386-interix3.h b/contrib/gcc/config/i386/i386-interix3.h index 274972b..aafe57f 100644 --- a/contrib/gcc/config/i386/i386-interix3.h +++ b/contrib/gcc/config/i386/i386-interix3.h @@ -20,13 +20,5 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #undef CPP_SPEC -#define CPP_SPEC "\ -%{!.S: -D__LANGUAGE_C__ -D__LANGUAGE_C %{!ansi:-DLANGUAGE_C}} \ -%{.S: -D__LANGUAGE_ASSEMBLY__ -D__LANGUAGE_ASSEMBLY %{!ansi:-DLANGUAGE_ASSEMBLY}} \ -%{.cc: -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus} \ -%{.cxx: -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus} \ -%{.C: -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus} \ -%{.m: -D__LANGUAGE_OBJECTIVE_C__ -D__LANGUAGE_OBJECTIVE_C} \ -%(cpp_cpu) \ -%{posix:-D_POSIX_SOURCE}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" diff --git a/contrib/gcc/config/i386/i386-modes.def b/contrib/gcc/config/i386/i386-modes.def new file mode 100644 index 0000000..5ef800f --- /dev/null +++ b/contrib/gcc/config/i386/i386-modes.def @@ -0,0 +1,46 @@ +/* Definitions of target machine for GNU compiler for IA-32. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Add any extra modes needed to represent the condition code. + + For the i386, we need separate modes when floating-point + equality comparisons are being done. + + Add CCNO to indicate comparisons against zero that requires + Overflow flag to be unset. Sign bit test is used instead and + thus can be used to form "a&b>0" type of tests. + + Add CCGC to indicate comparisons agains zero that allows + unspecified garbage in the Carry flag. This mode is used + by inc/dec instructions. + + Add CCGOC to indicate comparisons agains zero that allows + unspecified garbage in the Carry and Overflow flag. This + mode is used to simulate comparisons of (a-b) and (a+b) + against zero using sub/cmp/add operations. + + Add CCZ to indicate that only the Zero flag is valid. */ + +CC (CCGC) +CC (CCGOC) +CC (CCNO) +CC (CCZ) +CC (CCFP) +CC (CCFPU) diff --git a/contrib/gcc/config/i386/i386-protos.h b/contrib/gcc/config/i386/i386-protos.h index 03de4b1..b5ddb37 100644 --- a/contrib/gcc/config/i386/i386-protos.h +++ b/contrib/gcc/config/i386/i386-protos.h @@ -28,7 +28,6 @@ extern int ix86_frame_pointer_required PARAMS ((void)); extern void ix86_setup_frame_addresses PARAMS ((void)); extern void ix86_asm_file_end PARAMS ((FILE *)); -extern void load_pic_register PARAMS ((void)); extern HOST_WIDE_INT ix86_initial_elimination_offset PARAMS((int, int)); extern void ix86_expand_prologue PARAMS ((void)); extern void ix86_expand_epilogue PARAMS ((int)); @@ -43,6 +42,12 @@ extern int standard_80387_constant_p PARAMS ((rtx)); extern int standard_sse_constant_p PARAMS ((rtx)); extern int symbolic_reference_mentioned_p PARAMS ((rtx)); +extern int any_fp_register_operand PARAMS ((rtx, enum machine_mode)); +extern int register_and_not_any_fp_reg_operand PARAMS ((rtx, enum machine_mode)); + +extern int fp_register_operand PARAMS ((rtx, enum machine_mode)); +extern int register_and_not_fp_reg_operand PARAMS ((rtx, enum machine_mode)); + extern int x86_64_general_operand PARAMS ((rtx, enum machine_mode)); extern int x86_64_szext_general_operand PARAMS ((rtx, enum machine_mode)); extern int x86_64_nonmemory_operand PARAMS ((rtx, enum machine_mode)); @@ -51,6 +56,11 @@ extern int x86_64_immediate_operand PARAMS ((rtx, enum machine_mode)); extern int x86_64_zext_immediate_operand PARAMS ((rtx, enum machine_mode)); extern int const_int_1_operand PARAMS ((rtx, enum machine_mode)); extern int symbolic_operand PARAMS ((rtx, enum machine_mode)); +extern int tls_symbolic_operand PARAMS ((rtx, enum machine_mode)); +extern int global_dynamic_symbolic_operand PARAMS ((rtx, enum machine_mode)); +extern int local_dynamic_symbolic_operand PARAMS ((rtx, enum machine_mode)); +extern int initial_exec_symbolic_operand PARAMS ((rtx, enum machine_mode)); +extern int local_exec_symbolic_operand PARAMS ((rtx, enum machine_mode)); extern int pic_symbolic_operand PARAMS ((rtx, enum machine_mode)); extern int call_insn_operand PARAMS ((rtx, enum machine_mode)); extern int constant_call_address_operand PARAMS ((rtx, enum machine_mode)); @@ -84,6 +94,9 @@ extern int ix86_expand_movstr PARAMS ((rtx, rtx, rtx, rtx)); extern int ix86_expand_clrstr PARAMS ((rtx, rtx, rtx)); extern int ix86_expand_strlen PARAMS ((rtx, rtx, rtx, rtx)); +extern bool legitimate_constant_p PARAMS ((rtx)); +extern bool constant_address_p PARAMS ((rtx)); +extern bool legitimate_pic_operand_p PARAMS ((rtx)); extern int legitimate_pic_address_disp_p PARAMS ((rtx)); extern int legitimate_address_p PARAMS ((enum machine_mode, rtx, int)); extern rtx legitimize_pic_address PARAMS ((rtx, rtx)); @@ -92,15 +105,18 @@ extern rtx legitimize_address PARAMS ((rtx, rtx, enum machine_mode)); extern void print_reg PARAMS ((rtx, int, FILE*)); extern void print_operand PARAMS ((FILE*, rtx, int)); extern void print_operand_address PARAMS ((FILE*, rtx)); +extern bool output_addr_const_extra PARAMS ((FILE*, rtx)); extern void split_di PARAMS ((rtx[], int, rtx[], rtx[])); extern void split_ti PARAMS ((rtx[], int, rtx[], rtx[])); +extern const char *output_set_got PARAMS ((rtx)); extern const char *output_387_binary_op PARAMS ((rtx, rtx*)); extern const char *output_fix_trunc PARAMS ((rtx, rtx*)); extern const char *output_fp_compare PARAMS ((rtx, rtx*, int, int)); extern void i386_dwarf_output_addr_const PARAMS ((FILE*, rtx)); +extern void i386_output_dwarf_dtprel PARAMS ((FILE*, int, rtx)); extern rtx i386_simplify_dwarf_addr PARAMS ((rtx)); extern void ix86_expand_clear PARAMS ((rtx)); @@ -121,6 +137,7 @@ extern void ix86_expand_branch PARAMS ((enum rtx_code, rtx)); extern int ix86_expand_setcc PARAMS ((enum rtx_code, rtx)); extern int ix86_expand_int_movcc PARAMS ((rtx[])); extern int ix86_expand_fp_movcc PARAMS ((rtx[])); +extern void ix86_expand_call PARAMS ((rtx, rtx, rtx, rtx, rtx)); extern void x86_initialize_trampoline PARAMS ((rtx, rtx, rtx)); extern rtx ix86_zero_extend_to_Pmode PARAMS ((rtx)); extern void ix86_split_long_move PARAMS ((rtx[])); @@ -129,6 +146,7 @@ extern void ix86_split_ashrdi PARAMS ((rtx *, rtx)); extern void ix86_split_lshrdi PARAMS ((rtx *, rtx)); extern int ix86_address_cost PARAMS ((rtx)); extern rtx ix86_find_base_term PARAMS ((rtx)); +extern int ix86_check_movabs PARAMS ((rtx, int)); extern rtx assign_386_stack_local PARAMS ((enum machine_mode, int)); extern int ix86_attr_length_immediate_default PARAMS ((rtx, int)); @@ -143,7 +161,7 @@ extern bool ix86_function_value_regno_p PARAMS ((int)); extern bool ix86_function_arg_regno_p PARAMS ((int)); extern int ix86_function_arg_boundary PARAMS ((enum machine_mode, tree)); extern int ix86_return_in_memory PARAMS ((tree)); -extern void ix86_va_start PARAMS ((int, tree, rtx)); +extern void ix86_va_start PARAMS ((tree, rtx)); extern rtx ix86_va_arg PARAMS ((tree, tree)); extern void ix86_setup_incoming_varargs PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, @@ -167,11 +185,15 @@ extern void ix86_set_move_mem_attrs PARAMS ((rtx, rtx, rtx, rtx, rtx)); extern void emit_i387_cw_initialization PARAMS ((rtx, rtx)); extern bool ix86_fp_jump_nontrivial_p PARAMS ((enum rtx_code)); extern void x86_order_regs_for_local_alloc PARAMS ((void)); +extern void x86_function_profiler PARAMS ((FILE *, int)); #ifdef TREE_CODE extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx)); extern rtx function_arg PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int)); +extern int function_arg_pass_by_reference PARAMS ((CUMULATIVE_ARGS *, + enum machine_mode, + tree, int)); extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int)); extern rtx ix86_function_value PARAMS ((tree)); @@ -194,6 +216,20 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *, int)); extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int)); -extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree)); extern int x86_field_alignment PARAMS ((tree, int)); #endif + +extern rtx ix86_tls_get_addr PARAMS ((void)); +extern void x86_machine_dependent_reorg PARAMS ((rtx)); + +/* In winnt.c */ +extern int i386_pe_dllexport_name_p PARAMS ((const char *)); +extern int i386_pe_dllimport_name_p PARAMS ((const char *)); +extern void i386_pe_unique_section PARAMS ((tree, int)); +extern void i386_pe_declare_function_type PARAMS ((FILE *, const char *, int)); +extern void i386_pe_record_external_function PARAMS ((const char *)); +extern void i386_pe_record_exported_symbol PARAMS ((const char *, int)); +extern void i386_pe_asm_file_end PARAMS ((FILE *)); +extern void i386_pe_encode_section_info PARAMS ((tree, int)); +extern const char *i386_pe_strip_name_encoding PARAMS ((const char *)); +extern const char *i386_pe_strip_name_encoding_full PARAMS ((const char *)); diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c index 054a925..8d033b9 100644 --- a/contrib/gcc/config/i386/i386.c +++ b/contrib/gcc/config/i386/i386.c @@ -1,6 +1,6 @@ /* Subroutines used for code generation on IA-32. Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002 Free Software Foundation, Inc. + 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -42,13 +42,14 @@ Boston, MA 02111-1307, USA. */ #include "ggc.h" #include "target.h" #include "target-def.h" +#include "langhooks.h" #ifndef CHECK_STACK_LIMIT #define CHECK_STACK_LIMIT (-1) #endif /* Processor costs (relative to an add) */ -static const +static const struct processor_costs size_cost = { /* costs for tunning for size */ 2, /* cost of an add instruction */ 3, /* cost of a lea instruction */ @@ -83,9 +84,16 @@ struct processor_costs size_cost = { /* costs for tunning for size */ 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 2, /* cost of FADD and FSUB insns. */ + 2, /* cost of FMUL instruction. */ + 2, /* cost of FDIV instruction. */ + 2, /* cost of FABS instruction. */ + 2, /* cost of FCHS instruction. */ + 2, /* cost of FSQRT instruction. */ }; + /* Processor costs (relative to an add) */ -static const +static const struct processor_costs i386_cost = { /* 386 specific costs */ 1, /* cost of an add instruction */ 1, /* cost of a lea instruction */ @@ -120,9 +128,15 @@ struct processor_costs i386_cost = { /* 386 specific costs */ 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 23, /* cost of FADD and FSUB insns. */ + 27, /* cost of FMUL instruction. */ + 88, /* cost of FDIV instruction. */ + 22, /* cost of FABS instruction. */ + 24, /* cost of FCHS instruction. */ + 122, /* cost of FSQRT instruction. */ }; -static const +static const struct processor_costs i486_cost = { /* 486 specific costs */ 1, /* cost of an add instruction */ 1, /* cost of a lea instruction */ @@ -157,9 +171,15 @@ struct processor_costs i486_cost = { /* 486 specific costs */ 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 8, /* cost of FADD and FSUB insns. */ + 16, /* cost of FMUL instruction. */ + 73, /* cost of FDIV instruction. */ + 3, /* cost of FABS instruction. */ + 3, /* cost of FCHS instruction. */ + 83, /* cost of FSQRT instruction. */ }; -static const +static const struct processor_costs pentium_cost = { 1, /* cost of an add instruction */ 1, /* cost of a lea instruction */ @@ -194,9 +214,15 @@ struct processor_costs pentium_cost = { 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 3, /* cost of FADD and FSUB insns. */ + 3, /* cost of FMUL instruction. */ + 39, /* cost of FDIV instruction. */ + 1, /* cost of FABS instruction. */ + 1, /* cost of FCHS instruction. */ + 70, /* cost of FSQRT instruction. */ }; -static const +static const struct processor_costs pentiumpro_cost = { 1, /* cost of an add instruction */ 1, /* cost of a lea instruction */ @@ -231,9 +257,15 @@ struct processor_costs pentiumpro_cost = { 3, /* MMX or SSE register to integer */ 32, /* size of prefetch block */ 6, /* number of parallel prefetches */ + 3, /* cost of FADD and FSUB insns. */ + 5, /* cost of FMUL instruction. */ + 56, /* cost of FDIV instruction. */ + 2, /* cost of FABS instruction. */ + 2, /* cost of FCHS instruction. */ + 56, /* cost of FSQRT instruction. */ }; -static const +static const struct processor_costs k6_cost = { 1, /* cost of an add instruction */ 2, /* cost of a lea instruction */ @@ -268,9 +300,15 @@ struct processor_costs k6_cost = { 6, /* MMX or SSE register to integer */ 32, /* size of prefetch block */ 1, /* number of parallel prefetches */ + 2, /* cost of FADD and FSUB insns. */ + 2, /* cost of FMUL instruction. */ + 56, /* cost of FDIV instruction. */ + 2, /* cost of FABS instruction. */ + 2, /* cost of FCHS instruction. */ + 56, /* cost of FSQRT instruction. */ }; -static const +static const struct processor_costs athlon_cost = { 1, /* cost of an add instruction */ 2, /* cost of a lea instruction */ @@ -305,9 +343,15 @@ struct processor_costs athlon_cost = { 5, /* MMX or SSE register to integer */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ + 4, /* cost of FADD and FSUB insns. */ + 4, /* cost of FMUL instruction. */ + 24, /* cost of FDIV instruction. */ + 2, /* cost of FABS instruction. */ + 2, /* cost of FCHS instruction. */ + 35, /* cost of FSQRT instruction. */ }; -static const +static const struct processor_costs pentium4_cost = { 1, /* cost of an add instruction */ 1, /* cost of a lea instruction */ @@ -342,6 +386,12 @@ struct processor_costs pentium4_cost = { 10, /* MMX or SSE register to integer */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ + 5, /* cost of FADD and FSUB insns. */ + 7, /* cost of FMUL instruction. */ + 43, /* cost of FDIV instruction. */ + 2, /* cost of FABS instruction. */ + 2, /* cost of FCHS instruction. */ + 43, /* cost of FSQRT instruction. */ }; const struct processor_costs *ix86_cost = &pentium_cost; @@ -375,7 +425,8 @@ const int x86_use_cltd = ~(m_PENT | m_K6); const int x86_read_modify_write = ~m_PENT; const int x86_read_modify = ~(m_PENT | m_PPRO); const int x86_split_long_moves = m_PPRO; -const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486; +const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON; +const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); const int x86_single_stringop = m_386 | m_PENT4; const int x86_qimode_math = ~(0); const int x86_promote_qi_regs = 0; @@ -385,28 +436,29 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; -const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4); +const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO); const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO; const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO; const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; const int x86_decompose_lea = m_PENT4; -const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4; +const int x86_shift1 = ~m_486; +const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4; /* In case the avreage insn count for single function invocation is lower than this constant, emit fast (but longer) prologue and epilogue code. */ #define FAST_PROLOGUE_INSN_COUNT 30 + /* Set by prologue expander and used by epilogue expander to determine the style used. */ static int use_fast_prologue_epilogue; -#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx)) - -static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */ -static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */ -static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */ +/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ +static const char *const qi_reg_name[] = QI_REGISTER_NAMES; +static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; +static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* Array of the smallest class containing reg number REGNO, indexed by REGNO. Used by REGNO_REG_CLASS in i386.h. */ @@ -447,11 +499,16 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] = -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ }; -static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/, - 1 /*RDX*/, 2 /*RCX*/, - FIRST_REX_INT_REG /*R8 */, - FIRST_REX_INT_REG + 1 /*R9 */}; -static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4}; +static int const x86_64_int_parameter_registers[6] = +{ + 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, + FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ +}; + +static int const x86_64_int_return_registers[4] = +{ + 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ +}; /* The "default" register map used in 64bit mode. */ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = @@ -536,14 +593,19 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = rtx ix86_compare_op0 = NULL_RTX; rtx ix86_compare_op1 = NULL_RTX; +/* The encoding characters for the four TLS models present in ELF. */ + +static char const tls_model_chars[] = " GLil"; + #define MAX_386_STACK_LOCALS 3 /* Size of the register save area. */ #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) /* Define the structure for the machine field in struct function. */ -struct machine_function +struct machine_function GTY(()) { rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS]; + const char *some_ld_name; int save_varrargs_registers; int accesses_prev_frame; }; @@ -596,15 +658,17 @@ enum cmodel ix86_cmodel; /* Asm dialect. */ const char *ix86_asm_string; enum asm_dialect ix86_asm_dialect = ASM_ATT; +/* TLS dialext. */ +const char *ix86_tls_dialect_string; +enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; -/* which cpu are we scheduling for */ -enum processor_type ix86_cpu; - -/* which unit we are generating floating point math for */ +/* Which unit we are generating floating point math for. */ enum fpmath_unit ix86_fpmath; -/* which instruction set architecture to use. */ -int ix86_arch; +/* Which cpu are we scheduling for. */ +enum processor_type ix86_cpu; +/* Which instruction set architecture to use. */ +enum processor_type ix86_arch; /* Strings to hold which cpu and instruction set architecture to use. */ const char *ix86_cpu_string; /* for -mcpu=<xxx> */ @@ -646,35 +710,33 @@ static char internal_label_prefix[16]; static int internal_label_prefix_len; static int local_symbolic_operand PARAMS ((rtx, enum machine_mode)); +static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model)); static void output_pic_addr_const PARAMS ((FILE *, rtx, int)); static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode, int, int, FILE *)); +static const char *get_some_local_dynamic_name PARAMS ((void)); +static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *)); +static rtx maybe_get_pool_constant PARAMS ((rtx)); static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, rtx *, rtx *)); +static rtx get_thread_pointer PARAMS ((void)); +static void get_pc_thunk_name PARAMS ((char [32], unsigned int)); static rtx gen_push PARAMS ((rtx)); static int memory_address_length PARAMS ((rtx addr)); static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type)); static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type)); -static int ix86_safe_length PARAMS ((rtx)); -static enum attr_memory ix86_safe_memory PARAMS ((rtx)); -static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx)); static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx)); static void ix86_dump_ppro_packet PARAMS ((FILE *)); static void ix86_reorder_insn PARAMS ((rtx *, rtx *)); -static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair, - rtx)); -static void ix86_init_machine_status PARAMS ((struct function *)); -static void ix86_mark_machine_status PARAMS ((struct function *)); -static void ix86_free_machine_status PARAMS ((struct function *)); +static struct machine_function * ix86_init_machine_status PARAMS ((void)); static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode)); -static int ix86_safe_length_prefix PARAMS ((rtx)); static int ix86_nsaved_regs PARAMS ((void)); static void ix86_emit_save_regs PARAMS ((void)); static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT)); static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int)); +static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx)); -static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *)); static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *)); static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void)); static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT)); @@ -685,7 +747,14 @@ static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static void ix86_sched_init PARAMS ((FILE *, int, int)); static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); +static int ia32_use_dfa_pipeline_interface PARAMS ((void)); +static int ia32_multipass_dfa_lookahead PARAMS ((void)); static void ix86_init_mmx_sse_builtins PARAMS ((void)); +static rtx x86_this_parameter PARAMS ((tree)); +static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree)); +static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree)); struct ix86_address { @@ -694,6 +763,11 @@ struct ix86_address }; static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *)); +static bool ix86_cannot_force_const_mem PARAMS ((rtx)); + +static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED; +static const char *ix86_strip_name_encoding PARAMS ((const char *)) + ATTRIBUTE_UNUSED; struct builtin_description; static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *, @@ -703,8 +777,6 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); -static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, - tree, rtx)); static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); @@ -718,14 +790,18 @@ static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code)); static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code)); static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code)); static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code)); -static int ix86_save_reg PARAMS ((int, int)); +static unsigned int ix86_select_alt_pic_regnum PARAMS ((void)); +static int ix86_save_reg PARAMS ((unsigned int, int)); static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *)); static int ix86_comp_type_attributes PARAMS ((tree, tree)); +static int ix86_fntype_regparm PARAMS ((tree)); const struct attribute_spec ix86_attribute_table[]; static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); +static int ix86_value_regno PARAMS ((enum machine_mode)); +static bool contains_128bit_aligned_vector_p PARAMS ((tree)); -#ifdef DO_GLOBAL_CTORS_BODY +#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); #endif @@ -781,12 +857,8 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN ix86_expand_builtin -#if defined (OSF_OS) || defined (TARGET_OSF1ELF) - static void ix86_osf_output_function_prologue PARAMS ((FILE *, - HOST_WIDE_INT)); -# undef TARGET_ASM_FUNCTION_PROLOGUE -# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue -#endif +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue #undef TARGET_ASM_OPEN_PAREN #define TARGET_ASM_OPEN_PAREN "" @@ -819,9 +891,33 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, #define TARGET_SCHED_INIT ix86_sched_init #undef TARGET_SCHED_REORDER #define TARGET_SCHED_REORDER ix86_sched_reorder +#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE +#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \ + ia32_use_dfa_pipeline_interface +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + ia32_multipass_dfa_lookahead + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk struct gcc_target targetm = TARGET_INITIALIZER; +/* The svr4 ABI for the i386 says that records and unions are returned + in memory. */ +#ifndef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 1 +#endif + /* Sometimes certain combinations of command options do not make sense on a particular target machine. You can define a macro `OVERRIDE_OPTIONS' to take account of this. This macro, if @@ -883,6 +979,9 @@ override_options () {"i586", PROCESSOR_PENTIUM, 0}, {"pentium", PROCESSOR_PENTIUM, 0}, {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, + {"winchip-c6", PROCESSOR_I486, PTA_MMX}, + {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, + {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, {"i686", PROCESSOR_PENTIUMPRO, 0}, {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, @@ -904,7 +1003,12 @@ override_options () | PTA_3DNOW_A | PTA_SSE}, }; - int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); + int const pta_size = ARRAY_SIZE (processor_alias_table); + + /* By default our XFmode is the 80-bit extended format. If we have + use TFmode instead, it's also the 80-bit format, but with padding. */ + real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format; + real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; /* Set the default values for switches whose default depends on TARGET_64BIT in case they weren't overwriten by command line options. */ @@ -986,19 +1090,19 @@ override_options () /* Default cpu tuning to the architecture. */ ix86_cpu = ix86_arch; if (processor_alias_table[i].flags & PTA_MMX - && !(target_flags & MASK_MMX_SET)) + && !(target_flags_explicit & MASK_MMX)) target_flags |= MASK_MMX; if (processor_alias_table[i].flags & PTA_3DNOW - && !(target_flags & MASK_3DNOW_SET)) + && !(target_flags_explicit & MASK_3DNOW)) target_flags |= MASK_3DNOW; if (processor_alias_table[i].flags & PTA_3DNOW_A - && !(target_flags & MASK_3DNOW_A_SET)) + && !(target_flags_explicit & MASK_3DNOW_A)) target_flags |= MASK_3DNOW_A; if (processor_alias_table[i].flags & PTA_SSE - && !(target_flags & MASK_SSE_SET)) + && !(target_flags_explicit & MASK_SSE)) target_flags |= MASK_SSE; if (processor_alias_table[i].flags & PTA_SSE2 - && !(target_flags & MASK_SSE2_SET)) + && !(target_flags_explicit & MASK_SSE2)) target_flags |= MASK_SSE2; if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) x86_prefetch_sse = true; @@ -1028,8 +1132,6 @@ override_options () /* Arrange to set up i386_stack_locals for all functions. */ init_machine_status = ix86_init_machine_status; - mark_machine_status = ix86_mark_machine_status; - free_machine_status = ix86_free_machine_status; /* Validate -mregparm= value. */ if (ix86_regparm_string) @@ -1130,6 +1232,17 @@ override_options () ix86_branch_cost = i; } + if (ix86_tls_dialect_string) + { + if (strcmp (ix86_tls_dialect_string, "gnu") == 0) + ix86_tls_dialect = TLS_DIALECT_GNU; + else if (strcmp (ix86_tls_dialect_string, "sun") == 0) + ix86_tls_dialect = TLS_DIALECT_SUN; + else + error ("bad value (%s) for -mtls-dialect= switch", + ix86_tls_dialect_string); + } + /* Keep nonleaf frame pointers. */ if (TARGET_OMIT_LEAF_FRAME_POINTER) flag_omit_frame_pointer = 1; @@ -1187,7 +1300,7 @@ override_options () else ix86_fpmath = FPMATH_SSE | FPMATH_387; } - else + else error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); } @@ -1209,7 +1322,7 @@ override_options () target_flags |= MASK_3DNOW_A; } if ((x86_accumulate_outgoing_args & CPUMASK) - && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET) + && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) && !optimize_size) target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; @@ -1234,6 +1347,7 @@ optimization_options (level, size) if (level > 1) flag_schedule_insns = 0; #endif + /* The default values of these switches depend on the TARGET_64BIT that is not known at this moment. Mark these values with 2 and let user the to override these. In case there is no command line option @@ -1334,105 +1448,6 @@ ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs) return NULL_TREE; } -#if defined (OSF_OS) || defined (TARGET_OSF1ELF) - -/* Generate the assembly code for function entry. FILE is a stdio - stream to output the code to. SIZE is an int: how many units of - temporary storage to allocate. - - Refer to the array `regs_ever_live' to determine which registers to - save; `regs_ever_live[I]' is nonzero if register number I is ever - used in the function. This function is responsible for knowing - which registers should not be saved even if used. - - We override it here to allow for the new profiling code to go before - the prologue and the old mcount code to go after the prologue (and - after %ebx has been set up for ELF shared library support). */ - -static void -ix86_osf_output_function_prologue (file, size) - FILE *file; - HOST_WIDE_INT size; -{ - const char *prefix = ""; - const char *const lprefix = LPREFIX; - int labelno = current_function_profile_label_no; - -#ifdef OSF_OS - - if (TARGET_UNDERSCORES) - prefix = "_"; - - if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) - { - if (!flag_pic && !HALF_PIC_P ()) - { - fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); - fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); - } - - else if (HALF_PIC_P ()) - { - rtx symref; - - HALF_PIC_EXTERNAL ("_mcount_ptr"); - symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode, - "_mcount_ptr")); - - fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); - fprintf (file, "\tmovl %s%s,%%eax\n", prefix, - XSTR (symref, 0)); - fprintf (file, "\tcall *(%%eax)\n"); - } - - else - { - static int call_no = 0; - - fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); - fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); - fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", - lprefix, call_no++); - fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", - lprefix, labelno); - fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", - prefix); - fprintf (file, "\tcall *(%%eax)\n"); - } - } - -#else /* !OSF_OS */ - - if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) - { - if (!flag_pic) - { - fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); - fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); - } - - else - { - static int call_no = 0; - - fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); - fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); - fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", - lprefix, call_no++); - fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", - lprefix, labelno); - fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", - prefix); - fprintf (file, "\tcall *(%%eax)\n"); - } - } -#endif /* !OSF_OS */ - - function_prologue (file, size); -} - -#endif /* OSF_OS || TARGET_OSF1ELF */ - /* Return 0 if the attributes for two types are incompatible, 1 if they are compatible, and 2 if they are nearly compatible (which causes a warning to be generated). */ @@ -1455,6 +1470,21 @@ ix86_comp_type_attributes (type1, type2) return 1; } +/* Return the regparm value for a fuctio with the indicated TYPE. */ + +static int +ix86_fntype_regparm (type) + tree type; +{ + tree attr; + + attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); + if (attr) + return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); + else + return ix86_regparm; +} + /* Value is the number of bytes of arguments automatically popped when returning from a subroutine call. FUNDECL is the declaration node of the function (as a tree), @@ -1498,15 +1528,7 @@ ix86_return_pops_args (fundecl, funtype, size) if (aggregate_value_p (TREE_TYPE (funtype)) && !TARGET_64BIT) { - int nregs = ix86_regparm; - - if (funtype) - { - tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype)); - - if (attr) - nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); - } + int nregs = ix86_fntype_regparm (funtype); if (!nregs) return GET_MODE_SIZE (Pmode); @@ -1673,8 +1695,8 @@ classify_argument (mode, type, classes, bit_offset) (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - /* Variable sized structures are always passed on the stack. */ - if (mode == BLKmode && type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + /* Variable sized entities are always passed/returned in memory. */ + if (bytes < 0) return 0; if (type && AGGREGATE_TYPE_P (type)) @@ -1722,7 +1744,7 @@ classify_argument (mode, type, classes, bit_offset) return 0; for (i = 0; i < num; i++) { - int pos = (offset + bit_offset) / 8 / 8; + int pos = (offset + (bit_offset % 64)) / 8 / 8; classes[i + pos] = merge_classes (subclasses[i], classes[i + pos]); } @@ -1759,7 +1781,7 @@ classify_argument (mode, type, classes, bit_offset) for (i = 0; i < num; i++) { int pos = - (int_bit_position (field) + bit_offset) / 8 / 8; + (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; classes[i + pos] = merge_classes (subclasses[i], classes[i + pos]); } @@ -1805,12 +1827,12 @@ classify_argument (mode, type, classes, bit_offset) num = classify_argument (TYPE_MODE (type), type, subclasses, - (offset + bit_offset) % 256); + (offset + (bit_offset % 64)) % 256); if (!num) return 0; for (i = 0; i < num; i++) { - int pos = (offset + bit_offset) / 8 / 8; + int pos = (offset + (bit_offset % 64)) / 8 / 8; classes[i + pos] = merge_classes (subclasses[i], classes[i + pos]); } @@ -1922,6 +1944,10 @@ classify_argument (mode, type, classes, bit_offset) return 1; case V4SFmode: case V4SImode: + case V16QImode: + case V8HImode: + case V2DFmode: + case V2DImode: classes[0] = X86_64_SSE_CLASS; classes[1] = X86_64_SSEUP_CLASS; return 2; @@ -1929,8 +1955,7 @@ classify_argument (mode, type, classes, bit_offset) case V2SImode: case V4HImode: case V8QImode: - classes[0] = X86_64_SSE_CLASS; - return 1; + return 0; case BLKmode: case VOIDmode: return 0; @@ -2099,13 +2124,15 @@ construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regn break; case X86_64_SSE_CLASS: if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) - tmpmode = TImode, i++; + tmpmode = TImode; else tmpmode = DImode; exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (tmpmode, SSE_REGNO (sse_regno)), GEN_INT (i*8)); + if (tmpmode == TImode) + i++; sse_regno++; break; default: @@ -2232,6 +2259,9 @@ function_arg (cum, mode, type, named) break; case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ case DImode: case SImode: case HImode: @@ -2248,11 +2278,11 @@ function_arg (cum, mode, type, named) if (TARGET_DEBUG_ARG) { fprintf (stderr, - "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d", + "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); if (ret) - fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]); + print_simple_rtl (stderr, ret); else fprintf (stderr, ", stack"); @@ -2262,6 +2292,90 @@ function_arg (cum, mode, type, named) return ret; } +/* Return true when TYPE should be 128bit aligned for 32bit argument passing + ABI */ +static bool +contains_128bit_aligned_vector_p (type) + tree type; +{ + enum machine_mode mode = TYPE_MODE (type); + if (SSE_REG_MODE_P (mode) + && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) + return true; + if (TYPE_ALIGN (type) < 128) + return false; + + if (AGGREGATE_TYPE_P (type)) + { + /* Walk the agregates recursivly. */ + if (TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + { + tree field; + + if (TYPE_BINFO (type) != NULL + && TYPE_BINFO_BASETYPES (type) != NULL) + { + tree bases = TYPE_BINFO_BASETYPES (type); + int n_bases = TREE_VEC_LENGTH (bases); + int i; + + for (i = 0; i < n_bases; ++i) + { + tree binfo = TREE_VEC_ELT (bases, i); + tree type = BINFO_TYPE (binfo); + + if (contains_128bit_aligned_vector_p (type)) + return true; + } + } + /* And now merge the fields of structure. */ + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL + && contains_128bit_aligned_vector_p (TREE_TYPE (field))) + return true; + } + } + /* Just for use if some languages passes arrays by value. */ + else if (TREE_CODE (type) == ARRAY_TYPE) + { + if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) + return true; + } + else + abort (); + } + return false; +} + +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +int +function_arg_pass_by_reference (cum, mode, type, named) + CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; + enum machine_mode mode ATTRIBUTE_UNUSED; + tree type; + int named ATTRIBUTE_UNUSED; +{ + if (!TARGET_64BIT) + return 0; + + if (type && int_size_in_bytes (type) == -1) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference\n"); + return 1; + } + + return 0; +} + /* Gives the alignment boundary, in bits, of an argument with the specified mode and type. */ @@ -2271,14 +2385,34 @@ ix86_function_arg_boundary (mode, type) tree type; { int align; - if (!TARGET_64BIT) - return PARM_BOUNDARY; if (type) align = TYPE_ALIGN (type); else align = GET_MODE_ALIGNMENT (mode); if (align < PARM_BOUNDARY) align = PARM_BOUNDARY; + if (!TARGET_64BIT) + { + /* i386 ABI defines all arguments to be 4 byte aligned. We have to + make an exception for SSE modes since these require 128bit + alignment. + + The handling here differs from field_alignment. ICC aligns MMX + arguments to 4 byte boundaries, while structure fields are aligned + to 8 byte boundaries. */ + if (!type) + { + if (!SSE_REG_MODE_P (mode)) + align = PARM_BOUNDARY; + } + else + { + if (!contains_128bit_aligned_vector_p (type)) + align = PARM_BOUNDARY; + } + if (align != PARM_BOUNDARY && !TARGET_SSE) + abort(); + } if (align > 128) align = 128; return align; @@ -2320,7 +2454,8 @@ ix86_function_value (valtype) return ret; } else - return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype))); + return gen_rtx_REG (TYPE_MODE (valtype), + ix86_value_regno (TYPE_MODE (valtype))); } /* Return false iff type is returned in memory. */ @@ -2370,7 +2505,20 @@ ix86_libcall_value (mode) } } else - return gen_rtx_REG (mode, VALUE_REGNO (mode)); + return gen_rtx_REG (mode, ix86_value_regno (mode)); +} + +/* Given a mode, return the register to use for a return value. */ + +static int +ix86_value_regno (mode) + enum machine_mode mode; +{ + if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) + return FIRST_FLOAT_REG; + if (mode == TImode || VECTOR_MODE_P (mode)) + return FIRST_SSE_REG; + return 0; } /* Create the va_list data type. */ @@ -2384,12 +2532,12 @@ ix86_build_va_list () if (!TARGET_64BIT) return build_pointer_type (char_type_node); - record = make_lang_type (RECORD_TYPE); + record = (*lang_hooks.types.make_type) (RECORD_TYPE); type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); - f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), + f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), unsigned_type_node); - f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), + f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), unsigned_type_node); f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), ptr_type_node); @@ -2415,7 +2563,7 @@ ix86_build_va_list () } /* Perform any needed actions needed for a function that is receiving a - variable number of arguments. + variable number of arguments. CUM is as above. @@ -2454,6 +2602,8 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) /* Indicate to allocate space on the stack for varargs save area. */ ix86_save_varrargs_registers = 1; + cfun->stack_alignment_needed = 128; + fntype = TREE_TYPE (current_function_decl); stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) @@ -2529,8 +2679,7 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) /* Implement va_start. */ void -ix86_va_start (stdarg_p, valist, nextarg) - int stdarg_p; +ix86_va_start (valist, nextarg) tree valist; rtx nextarg; { @@ -2541,7 +2690,7 @@ ix86_va_start (stdarg_p, valist, nextarg) /* Only 64bit target needs something special. */ if (!TARGET_64BIT) { - std_expand_builtin_va_start (stdarg_p, valist, nextarg); + std_expand_builtin_va_start (valist, nextarg); return; } @@ -2590,7 +2739,6 @@ ix86_va_start (stdarg_p, valist, nextarg) t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - cfun->preferred_stack_boundary = 128; } /* Implement va_arg. */ @@ -2598,13 +2746,14 @@ rtx ix86_va_arg (valist, type) tree valist, type; { - static int intreg[6] = { 0, 1, 2, 3, 4, 5 }; + static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; tree f_gpr, f_fpr, f_ovf, f_sav; tree gpr, fpr, ovf, sav, t; int size, rsize; rtx lab_false, lab_over = NULL_RTX; rtx addr_rtx, r; rtx container; + int indirect_p = 0; /* Only 64bit target needs something special. */ if (!TARGET_64BIT) @@ -2624,6 +2773,13 @@ ix86_va_arg (valist, type) sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); size = int_size_in_bytes (type); + if (size == -1) + { + /* Passed by reference. */ + indirect_p = 1; + type = build_pointer_type (type); + size = int_size_in_bytes (type); + } rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; container = construct_container (TYPE_MODE (type), type, 0, @@ -2728,10 +2884,12 @@ ix86_va_arg (valist, type) { int i; rtx mem; + rtx x; /* Never use the memory itself, as it has the alias set. */ - addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0); - mem = gen_rtx_MEM (BLKmode, addr_rtx); + x = XEXP (assign_temp (type, 0, 1, 0), 0); + mem = gen_rtx_MEM (BLKmode, x); + force_operand (x, addr_rtx); set_mem_alias_set (mem, get_varargs_alias_set ()); set_mem_align (mem, BITS_PER_UNIT); @@ -2814,9 +2972,53 @@ ix86_va_arg (valist, type) if (container) emit_label (lab_over); + if (indirect_p) + { + r = gen_rtx_MEM (Pmode, addr_rtx); + set_mem_alias_set (r, get_varargs_alias_set ()); + emit_move_insn (addr_rtx, r); + } + return addr_rtx; } +/* Return nonzero if OP is either a i387 or SSE fp register. */ +int +any_fp_register_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return ANY_FP_REG_P (op); +} + +/* Return nonzero if OP is an i387 fp register. */ +int +fp_register_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return FP_REG_P (op); +} + +/* Return nonzero if OP is a non-fp register_operand. */ +int +register_and_not_any_fp_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return register_operand (op, mode) && !ANY_FP_REG_P (op); +} + +/* Return nonzero of OP is a register operand other than an + i387 fp register. */ +int +register_and_not_fp_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return register_operand (op, mode) && !FP_REG_P (op); +} + /* Return nonzero if OP is general operand representable on x86_64. */ int @@ -2876,6 +3078,29 @@ x86_64_movabs_operand (op, mode) return 0; } +/* Return nonzero if OPNUM's MEM should be matched + in movabs* patterns. */ + +int +ix86_check_movabs (insn, opnum) + rtx insn; + int opnum; +{ + rtx set, mem; + + set = PATTERN (insn); + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + if (GET_CODE (set) != SET) + abort (); + mem = XEXP (set, opnum); + while (GET_CODE (mem) == SUBREG) + mem = SUBREG_REG (mem); + if (GET_CODE (mem) != MEM) + abort (); + return (volatile_ok || !MEM_VOLATILE_P (mem)); +} + /* Return nonzero if OP is nonmemory operand representable on x86_64. */ int @@ -2953,9 +3178,9 @@ symbolic_operand (op, mode) if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF || (GET_CODE (op) == UNSPEC - && (XINT (op, 1) == 6 - || XINT (op, 1) == 7 - || XINT (op, 1) == 15))) + && (XINT (op, 1) == UNSPEC_GOT + || XINT (op, 1) == UNSPEC_GOTOFF + || XINT (op, 1) == UNSPEC_GOTPCREL))) return 1; if (GET_CODE (op) != PLUS || GET_CODE (XEXP (op, 1)) != CONST_INT) @@ -2967,7 +3192,7 @@ symbolic_operand (op, mode) return 1; /* Only @GOTOFF gets offsets. */ if (GET_CODE (op) != UNSPEC - || XINT (op, 1) != 7) + || XINT (op, 1) != UNSPEC_GOTOFF) return 0; op = XVECEXP (op, 0, 0); @@ -2996,7 +3221,7 @@ pic_symbolic_operand (op, mode) if (GET_CODE (XEXP (op, 0)) == UNSPEC) return 1; } - else + else { if (GET_CODE (op) == UNSPEC) return 1; @@ -3017,14 +3242,14 @@ local_symbolic_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { - if (GET_CODE (op) == LABEL_REF) - return 1; - if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == PLUS && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) op = XEXP (XEXP (op, 0), 0); + if (GET_CODE (op) == LABEL_REF) + return 1; + if (GET_CODE (op) != SYMBOL_REF) return 0; @@ -3034,10 +3259,10 @@ local_symbolic_operand (op, mode) return 1; /* There is, however, a not insubstantial body of code in the rest of - the compiler that assumes it can just stick the results of + the compiler that assumes it can just stick the results of ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ /* ??? This is a hack. Should update the body of the compiler to - always create a DECL an invoke ENCODE_SECTION_INFO. */ + always create a DECL an invoke targetm.encode_section_info. */ if (strncmp (XSTR (op, 0), internal_label_prefix, internal_label_prefix_len) == 0) return 1; @@ -3045,6 +3270,70 @@ local_symbolic_operand (op, mode) return 0; } +/* Test for various thread-local symbols. See ix86_encode_section_info. */ + +int +tls_symbolic_operand (op, mode) + register rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + const char *symbol_str; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + symbol_str = XSTR (op, 0); + + if (symbol_str[0] != '%') + return 0; + return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars; +} + +static int +tls_symbolic_operand_1 (op, kind) + rtx op; + enum tls_model kind; +{ + const char *symbol_str; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + symbol_str = XSTR (op, 0); + + return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind]; +} + +int +global_dynamic_symbolic_operand (op, mode) + register rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC); +} + +int +local_dynamic_symbolic_operand (op, mode) + register rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC); +} + +int +initial_exec_symbolic_operand (op, mode) + register rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC); +} + +int +local_exec_symbolic_operand (op, mode) + register rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC); +} + /* Test for a valid operand for a call instruction. Don't allow the arg pointer register or virtual regs since they may decay into reg + const, which the patterns can't handle. */ @@ -3072,11 +3361,6 @@ call_insn_operand (op, mode) if (GET_CODE (op) == SYMBOL_REF) return 1; - /* Half-pic doesn't allow anything but registers and constants. - We've just taken care of the later. */ - if (HALF_PIC_P ()) - return register_operand (op, Pmode); - /* Otherwise we can allow any general_operand in the address. */ return general_operand (op, Pmode); } @@ -3221,6 +3505,30 @@ nonmemory_no_elim_operand (op, mode) return GET_CODE (op) == CONST_INT || register_operand (op, mode); } +/* Return false if this is any eliminable register or stack register, + otherwise work like register_operand. */ + +int +index_register_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + rtx t = op; + if (GET_CODE (t) == SUBREG) + t = SUBREG_REG (t); + if (!REG_P (t)) + return 0; + if (t == arg_pointer_rtx + || t == frame_pointer_rtx + || t == virtual_incoming_args_rtx + || t == virtual_stack_vars_rtx + || t == virtual_stack_dynamic_rtx + || REGNO (t) == STACK_POINTER_REGNUM) + return 0; + + return general_operand (op, mode); +} + /* Return true if op is a Q_REGS class register. */ int @@ -3249,6 +3557,19 @@ non_q_regs_operand (op, mode) return NON_QI_REG_P (op); } +/* Return 1 when OP is operand acceptable for standard SSE move. */ +int +vector_move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (nonimmediate_operand (op, mode)) + return 1; + if (GET_MODE (op) != mode && mode != VOIDmode) + return 0; + return (op == CONST0_RTX (GET_MODE (op))); +} + /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns. */ int @@ -3560,6 +3881,11 @@ aligned_operand (op, mode) if (! ix86_decompose_address (op, &parts)) abort (); + if (parts.base && GET_CODE (parts.base) == SUBREG) + parts.base = SUBREG_REG (parts.base); + if (parts.index && GET_CODE (parts.index) == SUBREG) + parts.index = SUBREG_REG (parts.index); + /* Look for some component that isn't known to be aligned. */ if (parts.index) { @@ -3609,8 +3935,8 @@ int standard_sse_constant_p (x) rtx x; { - if (GET_CODE (x) != CONST_DOUBLE) - return -1; + if (x == const0_rtx) + return 1; return (x == CONST0_RTX (GET_MODE (x))); } @@ -3700,22 +4026,33 @@ x86_64_sign_extended_value (value) } break; - /* For certain code models, the symbolic references are known to fit. */ + /* For certain code models, the symbolic references are known to fit. + in CM_SMALL_PIC model we know it fits if it is local to the shared + library. Don't count TLS SYMBOL_REFs here, since they should fit + only if inside of UNSPEC handled below. */ case SYMBOL_REF: - return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL; + return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL); /* For certain code models, the code is near as well. */ case LABEL_REF: - return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC; + return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM + || ix86_cmodel == CM_KERNEL); /* We also may accept the offsetted memory references in certain special cases. */ case CONST: - if (GET_CODE (XEXP (value, 0)) == UNSPEC - && XVECLEN (XEXP (value, 0), 0) == 1 - && XINT (XEXP (value, 0), 1) == 15) - return 1; - else if (GET_CODE (XEXP (value, 0)) == PLUS) + if (GET_CODE (XEXP (value, 0)) == UNSPEC) + switch (XINT (XEXP (value, 0), 1)) + { + case UNSPEC_GOTPCREL: + case UNSPEC_DTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_NTPOFF: + return 1; + default: + break; + } + if (GET_CODE (XEXP (value, 0)) == PLUS) { rtx op1 = XEXP (XEXP (value, 0), 0); rtx op2 = XEXP (XEXP (value, 0), 1); @@ -3729,12 +4066,12 @@ x86_64_sign_extended_value (value) switch (GET_CODE (op1)) { case SYMBOL_REF: - /* For CM_SMALL assume that latest object is 1MB before + /* For CM_SMALL assume that latest object is 16MB before end of 31bits boundary. We may also accept pretty large negative constants knowing that all objects are in the positive half of address space. */ if (ix86_cmodel == CM_SMALL - && offset < 1024*1024*1024 + && offset < 16*1024*1024 && trunc_int_for_mode (offset, SImode) == offset) return 1; /* For CM_KERNEL we know that all object resist in the @@ -3750,7 +4087,7 @@ x86_64_sign_extended_value (value) /* These conditions are similar to SYMBOL_REF ones, just the constraints for code models differ. */ if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) - && offset < 1024*1024*1024 + && offset < 16*1024*1024 && trunc_int_for_mode (offset, SImode) == offset) return 1; if (ix86_cmodel == CM_KERNEL @@ -3758,6 +4095,16 @@ x86_64_sign_extended_value (value) && trunc_int_for_mode (offset, SImode) == offset) return 1; break; + case UNSPEC: + switch (XINT (op1, 1)) + { + case UNSPEC_DTPOFF: + case UNSPEC_NTPOFF: + if (offset > 0 + && trunc_int_for_mode (offset, SImode) == offset) + return 1; + } + break; default: return 0; } @@ -3865,7 +4212,11 @@ ix86_frame_pointer_required () /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off the frame pointer by default. Turn it back on now if we've not got a leaf function. */ - if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ()) + if (TARGET_OMIT_LEAF_FRAME_POINTER + && (!current_function_is_leaf)) + return 1; + + if (current_function_profile) return 1; return 0; @@ -3879,7 +4230,28 @@ ix86_setup_frame_addresses () cfun->machine->accesses_prev_frame = 1; } -static char pic_label_name[32]; +#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY) +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +static int pic_labels_used; + +/* Fills in the label name that should be used for a pc thunk for + the given register. */ + +static void +get_pc_thunk_name (name, regno) + char name[32]; + unsigned int regno; +{ + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); +} + /* This function generates code for -fpic that loads %ebx with the return address of the caller and then returns. */ @@ -3889,74 +4261,97 @@ ix86_asm_file_end (file) FILE *file; { rtx xops[2]; + int regno; - if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0) - return; + for (regno = 0; regno < 8; ++regno) + { + char name[32]; - /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related - to updating relocations to a section being discarded such that this - doesn't work. Ought to detect this at configure time. */ -#if 0 - /* The trick here is to create a linkonce section containing the - pic label thunk, but to refer to it with an internal label. - Because the label is internal, we don't have inter-dso name - binding issues on hosts that don't support ".hidden". - - In order to use these macros, however, we must create a fake - function decl. */ - if (targetm.have_named_sections) - { - tree decl = build_decl (FUNCTION_DECL, - get_identifier ("i686.get_pc_thunk"), - error_mark_node); - DECL_ONE_ONLY (decl) = 1; - UNIQUE_SECTION (decl, 0); - named_section (decl, NULL); - } - else -#else - text_section (); -#endif + if (! ((pic_labels_used >> regno) & 1)) + continue; + + get_pc_thunk_name (name, regno); + + if (USE_HIDDEN_LINKONCE) + { + tree decl; - /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an - internal (non-global) label that's being emitted, it didn't make - sense to have .type information for local labels. This caused - the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving - me debug info for a label that you're declaring non-global?) this - was changed to call ASM_OUTPUT_LABEL() instead. */ + decl = build_decl (FUNCTION_DECL, get_identifier (name), + error_mark_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; + DECL_ONE_ONLY (decl) = 1; - ASM_OUTPUT_LABEL (file, pic_label_name); + (*targetm.asm_out.unique_section) (decl, 0); + named_section (decl, NULL, 0); + + (*targetm.asm_out.globalize_label) (file, name); + fputs ("\t.hidden\t", file); + assemble_name (file, name); + fputc ('\n', file); + ASM_DECLARE_FUNCTION_NAME (file, name, decl); + } + else + { + text_section (); + ASM_OUTPUT_LABEL (file, name); + } - xops[0] = pic_offset_table_rtx; - xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); - output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); - output_asm_insn ("ret", xops); + xops[0] = gen_rtx_REG (SImode, regno); + xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); + output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); + output_asm_insn ("ret", xops); + } } -void -load_pic_register () -{ - rtx gotsym, pclab; +/* Emit code for the SET_GOT patterns. */ - if (TARGET_64BIT) - abort (); +const char * +output_set_got (dest) + rtx dest; +{ + rtx xops[3]; - gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + xops[0] = dest; + xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); - if (TARGET_DEEP_BRANCH_PREDICTION) + if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) { - if (! pic_label_name[0]) - ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0); - pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name)); + xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); + + if (!flag_pic) + output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); + else + output_asm_insn ("call\t%a2", xops); + +#if TARGET_MACHO + /* Output the "canonical" label name ("Lxx$pb") here too. This + is what will be referred to by the Mach-O PIC subsystem. */ + ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); +#endif + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (XEXP (xops[2], 0))); + + if (flag_pic) + output_asm_insn ("pop{l}\t%0", xops); } else { - pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + char name[32]; + get_pc_thunk_name (name, REGNO (dest)); + pic_labels_used |= 1 << REGNO (dest); + + xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); + xops[2] = gen_rtx_MEM (QImode, xops[2]); + output_asm_insn ("call\t%X2", xops); } - emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab)); + if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) + output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); + else if (!TARGET_MACHO) + output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops); - emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab)); + return ""; } /* Generate an "push" pattern for input ARG. */ @@ -3972,17 +4367,40 @@ gen_push (arg) arg); } +/* Return >= 0 if there is an unused call-clobbered register available + for the entire function. */ + +static unsigned int +ix86_select_alt_pic_regnum () +{ + if (current_function_is_leaf && !current_function_profile) + { + int i; + for (i = 2; i >= 0; --i) + if (!regs_ever_live[i]) + return i; + } + + return INVALID_REGNUM; +} + /* Return 1 if we need to save REGNO. */ static int ix86_save_reg (regno, maybe_eh_return) - int regno; + unsigned int regno; int maybe_eh_return; { - if (regno == PIC_OFFSET_TABLE_REGNUM - && (current_function_uses_pic_offset_table - || current_function_uses_const_pool - || current_function_calls_eh_return)) - return 1; + if (pic_offset_table_rtx + && regno == REAL_PIC_OFFSET_TABLE_REGNUM + && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] + || current_function_profile + || current_function_calls_eh_return + || current_function_uses_const_pool)) + { + if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) + return 0; + return 1; + } if (current_function_calls_eh_return && maybe_eh_return) { @@ -3992,7 +4410,7 @@ ix86_save_reg (regno, maybe_eh_return) unsigned test = EH_RETURN_DATA_REGNO (i); if (test == INVALID_REGNUM) break; - if (test == (unsigned) regno) + if (test == regno) return 1; } } @@ -4061,7 +4479,7 @@ ix86_compute_frame_layout (frame) frame->nregs = ix86_nsaved_regs (); total_size = size; - /* Skip return value and save base pointer. */ + /* Skip return address and saved base pointer. */ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; frame->hard_frame_pointer_offset = offset; @@ -4203,9 +4621,7 @@ void ix86_expand_prologue () { rtx insn; - int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table - || current_function_uses_const_pool) - && !TARGET_64BIT); + bool pic_reg_used; struct ix86_frame frame; int use_mov = 0; HOST_WIDE_INT allocate; @@ -4270,6 +4686,10 @@ ix86_expand_prologue () CALL_INSN_FUNCTION_USAGE (insn) = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), CALL_INSN_FUNCTION_USAGE (insn)); + + /* Don't allow scheduling pass to move insns across __alloca + call. */ + emit_insn (gen_blockage (const0_rtx)); } if (use_mov) { @@ -4284,14 +4704,36 @@ ix86_expand_prologue () SUBTARGET_PROLOGUE; #endif + pic_reg_used = false; + if (pic_offset_table_rtx + && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] + || current_function_profile)) + { + unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); + + if (alt_pic_reg_used != INVALID_REGNUM) + REGNO (pic_offset_table_rtx) = alt_pic_reg_used; + + pic_reg_used = true; + } + if (pic_reg_used) - load_pic_register (); + { + insn = emit_insn (gen_set_got (pic_offset_table_rtx)); - /* If we are profiling, make sure no instructions are scheduled before - the call to mcount. However, if -fpic, the above call will have - done that. */ - if (current_function_profile && ! pic_reg_used) - emit_insn (gen_blockage ()); + /* Even with accurate pre-reload life analysis, we can wind up + deleting all references to the pic register after reload. + Consider if cross-jumping unifies two sides of a branch + controled by a comparison vs the only read from a global. + In which case, allow the set_got to be deleted, though we're + too late to do anything about the ebx save in the prologue. */ + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); + } + + /* Prevent function calls from be scheduled before the call to mcount. + In the pic_reg_used case, make sure that the got load isn't deleted. */ + if (current_function_profile) + emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); } /* Emit code to restore saved registers using MOV insns. First register @@ -4480,6 +4922,17 @@ ix86_expand_epilogue (style) else emit_jump_insn (gen_return_internal ()); } + +/* Reset from the function's potential modifications. */ + +static void +ix86_output_function_epilogue (file, size) + FILE *file ATTRIBUTE_UNUSED; + HOST_WIDE_INT size ATTRIBUTE_UNUSED; +{ + if (pic_offset_table_rtx) + REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; +} /* Extract the parts of an RTL expression that is a valid memory address for an instruction. Return 0 if the structure of the address is @@ -4499,7 +4952,7 @@ ix86_decompose_address (addr, out) rtx scale_rtx = NULL_RTX; int retval = 1; - if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) + if (REG_P (addr) || GET_CODE (addr) == SUBREG) base = addr; else if (GET_CODE (addr) == PLUS) { @@ -4626,6 +5079,11 @@ ix86_address_cost (x) if (!ix86_decompose_address (x, &parts)) abort (); + if (parts.base && GET_CODE (parts.base) == SUBREG) + parts.base = SUBREG_REG (parts.base); + if (parts.index && GET_CODE (parts.index) == SUBREG) + parts.index = SUBREG_REG (parts.index); + /* More complex memory references are better. */ if (parts.disp && parts.disp != const0_rtx) cost--; @@ -4689,8 +5147,7 @@ ix86_find_base_term (x) || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) term = XEXP (term, 0); if (GET_CODE (term) != UNSPEC - || XVECLEN (term, 0) != 1 - || XINT (term, 1) != 15) + || XINT (term, 1) != UNSPEC_GOTPCREL) return x; term = XVECEXP (term, 0, 0); @@ -4713,8 +5170,7 @@ ix86_find_base_term (x) term = XEXP (term, 0); if (GET_CODE (term) != UNSPEC - || XVECLEN (term, 0) != 1 - || XINT (term, 1) != 7) + || XINT (term, 1) != UNSPEC_GOTOFF) return x; term = XVECEXP (term, 0, 0); @@ -4726,6 +5182,125 @@ ix86_find_base_term (x) return term; } +/* Determine if a given RTX is a valid constant. We already know this + satisfies CONSTANT_P. */ + +bool +legitimate_constant_p (x) + rtx x; +{ + rtx inner; + + switch (GET_CODE (x)) + { + case SYMBOL_REF: + /* TLS symbols are not constant. */ + if (tls_symbolic_operand (x, Pmode)) + return false; + break; + + case CONST: + inner = XEXP (x, 0); + + /* Offsets of TLS symbols are never valid. + Discourage CSE from creating them. */ + if (GET_CODE (inner) == PLUS + && tls_symbolic_operand (XEXP (inner, 0), Pmode)) + return false; + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (inner) == UNSPEC) + switch (XINT (inner, 1)) + { + case UNSPEC_TPOFF: + return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); + default: + return false; + } + break; + + default: + break; + } + + /* Otherwise we handle everything else in the move patterns. */ + return true; +} + +/* Determine if it's legal to put X into the constant pool. This + is not possible for the address of thread-local symbols, which + is checked above. */ + +static bool +ix86_cannot_force_const_mem (x) + rtx x; +{ + return !legitimate_constant_p (x); +} + +/* Determine if a given RTX is a valid constant address. */ + +bool +constant_address_p (x) + rtx x; +{ + switch (GET_CODE (x)) + { + case LABEL_REF: + case CONST_INT: + return true; + + case CONST_DOUBLE: + return TARGET_64BIT; + + case CONST: + /* For Mach-O, really believe the CONST. */ + if (TARGET_MACHO) + return true; + /* Otherwise fall through. */ + case SYMBOL_REF: + return !flag_pic && legitimate_constant_p (x); + + default: + return false; + } +} + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +bool +legitimate_pic_operand_p (x) + rtx x; +{ + rtx inner; + + switch (GET_CODE (x)) + { + case CONST: + inner = XEXP (x, 0); + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (inner) == UNSPEC) + switch (XINT (inner, 1)) + { + case UNSPEC_TPOFF: + return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); + default: + return false; + } + /* FALLTHRU */ + + case SYMBOL_REF: + case LABEL_REF: + return legitimate_pic_address_disp_p (x); + + default: + return true; + } +} + /* Determine if a given CONST RTX is a valid memory displacement in PIC mode. */ @@ -4733,21 +5308,32 @@ int legitimate_pic_address_disp_p (disp) register rtx disp; { + bool saw_plus; + /* In 64bit mode we can allow direct addresses of symbols and labels when they are not dynamic symbols. */ if (TARGET_64BIT) { - rtx x = disp; - if (GET_CODE (disp) == CONST) - x = XEXP (disp, 0); - /* ??? Handle PIC code models */ - if (GET_CODE (x) == PLUS - && (GET_CODE (XEXP (x, 1)) == CONST_INT - && ix86_cmodel == CM_SMALL_PIC - && INTVAL (XEXP (x, 1)) < 1024*1024*1024 - && INTVAL (XEXP (x, 1)) > -1024*1024*1024)) - x = XEXP (x, 0); - if (local_symbolic_operand (x, Pmode)) + /* TLS references should always be enclosed in UNSPEC. */ + if (tls_symbolic_operand (disp, GET_MODE (disp))) + return 0; + if (GET_CODE (disp) == SYMBOL_REF + && ix86_cmodel == CM_SMALL_PIC + && (CONSTANT_POOL_ADDRESS_P (disp) + || SYMBOL_REF_FLAG (disp))) + return 1; + if (GET_CODE (disp) == LABEL_REF) + return 1; + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == PLUS + && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF + && ix86_cmodel == CM_SMALL_PIC + && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0)) + || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0)))) + || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF) + && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT + && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024 + && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024) return 1; } if (GET_CODE (disp) != CONST) @@ -4759,8 +5345,7 @@ legitimate_pic_address_disp_p (disp) /* We are unsafe to allow PLUS expressions. This limit allowed distance of GOT tables. We should not need these anyway. */ if (GET_CODE (disp) != UNSPEC - || XVECLEN (disp, 0) != 1 - || XINT (disp, 1) != 15) + || XINT (disp, 1) != UNSPEC_GOTPCREL) return 0; if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF @@ -4769,27 +5354,51 @@ legitimate_pic_address_disp_p (disp) return 1; } + saw_plus = false; if (GET_CODE (disp) == PLUS) { if (GET_CODE (XEXP (disp, 1)) != CONST_INT) return 0; disp = XEXP (disp, 0); + saw_plus = true; } - if (GET_CODE (disp) != UNSPEC - || XVECLEN (disp, 0) != 1) + /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */ + if (TARGET_MACHO && GET_CODE (disp) == MINUS) + { + if (GET_CODE (XEXP (disp, 0)) == LABEL_REF + || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) + if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) + { + const char *sym_name = XSTR (XEXP (disp, 1), 0); + if (strstr (sym_name, "$pb") != 0) + return 1; + } + } + + if (GET_CODE (disp) != UNSPEC) return 0; - /* Must be @GOT or @GOTOFF. */ switch (XINT (disp, 1)) { - case 6: /* @GOT */ + case UNSPEC_GOT: + if (saw_plus) + return false; return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; - - case 7: /* @GOTOFF */ + case UNSPEC_GOTOFF: return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); + case UNSPEC_GOTTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + if (saw_plus) + return false; + return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); + case UNSPEC_NTPOFF: + return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); + case UNSPEC_DTPOFF: + return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); } - + return 0; } @@ -4821,6 +5430,13 @@ legitimate_address_p (mode, addr, strict) debug_rtx (addr); } + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP) + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "Success.\n"); + return TRUE; + } + if (ix86_decompose_address (addr, &parts) <= 0) { reason = "decomposition failed"; @@ -4840,9 +5456,15 @@ legitimate_address_p (mode, addr, strict) if (base) { + rtx reg; reason_rtx = base; - if (GET_CODE (base) != REG) + if (GET_CODE (base) == SUBREG) + reg = SUBREG_REG (base); + else + reg = base; + + if (GET_CODE (reg) != REG) { reason = "base is not a register"; goto report_error; @@ -4854,8 +5476,8 @@ legitimate_address_p (mode, addr, strict) goto report_error; } - if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) - || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) + if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) { reason = "base is not valid"; goto report_error; @@ -4870,9 +5492,15 @@ legitimate_address_p (mode, addr, strict) if (index) { + rtx reg; reason_rtx = index; - if (GET_CODE (index) != REG) + if (GET_CODE (index) == SUBREG) + reg = SUBREG_REG (index); + else + reg = index; + + if (GET_CODE (reg) != REG) { reason = "index is not a register"; goto report_error; @@ -4884,8 +5512,8 @@ legitimate_address_p (mode, addr, strict) goto report_error; } - if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) - || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) + if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) { reason = "index is not valid"; goto report_error; @@ -4914,37 +5542,51 @@ legitimate_address_p (mode, addr, strict) { reason_rtx = disp; - if (!CONSTANT_ADDRESS_P (disp)) - { - reason = "displacement is not constant"; - goto report_error; - } + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC) + switch (XINT (XEXP (disp, 0), 1)) + { + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + case UNSPEC_GOTPCREL: + if (!flag_pic) + abort (); + goto is_legitimate_pic; - if (TARGET_64BIT) - { - if (!x86_64_sign_extended_value (disp)) - { - reason = "displacement is out of range"; - goto report_error; - } - } - else - { - if (GET_CODE (disp) == CONST_DOUBLE) - { - reason = "displacement is a const_double"; - goto report_error; - } - } + case UNSPEC_GOTTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + case UNSPEC_NTPOFF: + case UNSPEC_DTPOFF: + break; - if (flag_pic && SYMBOLIC_CONST (disp)) + default: + reason = "invalid address unspec"; + goto report_error; + } + + else if (flag_pic && (SYMBOLIC_CONST (disp) +#if TARGET_MACHO + && !machopic_operand_p (disp) +#endif + )) { + is_legitimate_pic: if (TARGET_64BIT && (index || base)) { - reason = "non-constant pic memory reference"; - goto report_error; + /* foo@dtpoff(%rX) is ok. */ + if (GET_CODE (disp) != CONST + || GET_CODE (XEXP (disp, 0)) != PLUS + || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC + || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT + || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) + { + reason = "non-constant pic memory reference"; + goto report_error; + } } - if (! legitimate_pic_address_disp_p (disp)) + else if (! legitimate_pic_address_disp_p (disp)) { reason = "displacement is an invalid pic construct"; goto report_error; @@ -4972,14 +5614,20 @@ legitimate_address_p (mode, addr, strict) that never results in lea, this seems to be easier and correct fix for crash to disable this test. */ } - else if (HALF_PIC_P ()) + else if (!CONSTANT_ADDRESS_P (disp)) { - if (! HALF_PIC_ADDRESS_P (disp) - || (base != NULL_RTX || index != NULL_RTX)) - { - reason = "displacement is an invalid half-pic reference"; - goto report_error; - } + reason = "displacement is not constant"; + goto report_error; + } + else if (TARGET_64BIT && !x86_64_sign_extended_value (disp)) + { + reason = "displacement is out of range"; + goto report_error; + } + else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE) + { + reason = "displacement is a const_double"; + goto report_error; } } @@ -4988,7 +5636,7 @@ legitimate_address_p (mode, addr, strict) fprintf (stderr, "Success.\n"); return TRUE; -report_error: + report_error: if (TARGET_DEBUG_ADDR) { fprintf (stderr, "Error: %s\n", reason); @@ -5002,10 +5650,10 @@ report_error: static HOST_WIDE_INT ix86_GOT_alias_set () { - static HOST_WIDE_INT set = -1; - if (set == -1) - set = new_alias_set (); - return set; + static HOST_WIDE_INT set = -1; + if (set == -1) + set = new_alias_set (); + return set; } /* Return a legitimate reference for ORIG (an address) using the @@ -5035,34 +5683,37 @@ legitimize_pic_address (orig, reg) rtx new = orig; rtx base; - if (local_symbolic_operand (addr, Pmode)) +#if TARGET_MACHO + if (reg == 0) + reg = gen_reg_rtx (Pmode); + /* Use the generic Mach-O PIC machinery. */ + return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); +#endif + + if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) + new = addr; + else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) { - /* In 64bit mode we can address such objects directly. */ - if (TARGET_64BIT) - new = addr; - else - { - /* This symbol may be referenced via a displacement from the PIC - base address (@GOTOFF). */ + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ - current_function_uses_pic_offset_table = 1; - new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7); - new = gen_rtx_CONST (Pmode, new); - new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + new = gen_rtx_CONST (Pmode, new); + new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); - if (reg != 0) - { - emit_move_insn (reg, new); - new = reg; - } - } + if (reg != 0) + { + emit_move_insn (reg, new); + new = reg; + } } else if (GET_CODE (addr) == SYMBOL_REF) { if (TARGET_64BIT) { - current_function_uses_pic_offset_table = 1; - new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15); + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); new = gen_rtx_CONST (Pmode, new); new = gen_rtx_MEM (Pmode, new); RTX_UNCHANGING_P (new) = 1; @@ -5081,8 +5732,9 @@ legitimize_pic_address (orig, reg) /* This symbol must be referenced via a load from the Global Offset Table (@GOT). */ - current_function_uses_pic_offset_table = 1; - new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6); + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); new = gen_rtx_CONST (Pmode, new); new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); new = gen_rtx_MEM (Pmode, new); @@ -5122,8 +5774,10 @@ legitimize_pic_address (orig, reg) { if (!TARGET_64BIT) { - current_function_uses_pic_offset_table = 1; - new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7); + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), + UNSPEC_GOTOFF); new = gen_rtx_PLUS (Pmode, new, op1); new = gen_rtx_CONST (Pmode, new); new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); @@ -5136,7 +5790,9 @@ legitimize_pic_address (orig, reg) } else { - /* ??? We need to limit offsets here. */ + if (INTVAL (op1) < -16*1024*1024 + || INTVAL (op1) >= 16*1024*1024) + new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1)); } } else @@ -5161,7 +5817,102 @@ legitimize_pic_address (orig, reg) } return new; } + +static void +ix86_encode_section_info (decl, first) + tree decl; + int first ATTRIBUTE_UNUSED; +{ + bool local_p = (*targetm.binds_local_p) (decl); + rtx rtl, symbol; + + rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl); + if (GET_CODE (rtl) != MEM) + return; + symbol = XEXP (rtl, 0); + if (GET_CODE (symbol) != SYMBOL_REF) + return; + + /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global + symbol so that we may access it directly in the GOT. */ + + if (flag_pic) + SYMBOL_REF_FLAG (symbol) = local_p; + + /* For ELF, encode thread-local data with %[GLil] for "global dynamic", + "local dynamic", "initial exec" or "local exec" TLS models + respectively. */ + + if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) + { + const char *symbol_str; + char *newstr; + size_t len; + enum tls_model kind = decl_tls_model (decl); + + if (TARGET_64BIT && ! flag_pic) + { + /* x86-64 doesn't allow non-pic code for shared libraries, + so don't generate GD/LD TLS models for non-pic code. */ + switch (kind) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + kind = TLS_MODEL_INITIAL_EXEC; break; + case TLS_MODEL_LOCAL_DYNAMIC: + kind = TLS_MODEL_LOCAL_EXEC; break; + default: + break; + } + } + + symbol_str = XSTR (symbol, 0); + + if (symbol_str[0] == '%') + { + if (symbol_str[1] == tls_model_chars[kind]) + return; + symbol_str += 2; + } + len = strlen (symbol_str) + 1; + newstr = alloca (len + 2); + + newstr[0] = '%'; + newstr[1] = tls_model_chars[kind]; + memcpy (newstr + 2, symbol_str, len); + + XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1); + } +} + +/* Undo the above when printing symbol names. */ + +static const char * +ix86_strip_name_encoding (str) + const char *str; +{ + if (str[0] == '%') + str += 2; + if (str [0] == '*') + str += 1; + return str; +} +/* Load the thread pointer into a register. */ + +static rtx +get_thread_pointer () +{ + rtx tp; + + tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); + tp = gen_rtx_MEM (Pmode, tp); + RTX_UNCHANGING_P (tp) = 1; + set_mem_alias_set (tp, ix86_GOT_alias_set ()); + tp = force_reg (Pmode, tp); + + return tp; +} + /* Try machine-dependent ways of modifying an illegitimate address to be legitimate. If we find one, return the new, valid address. This macro is used in only one place: `memory_address' in explow.c. @@ -5199,6 +5950,123 @@ legitimize_address (x, oldx, mode) debug_rtx (x); } + log = tls_symbolic_operand (x, mode); + if (log) + { + rtx dest, base, off, pic; + int type; + + switch (log) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + dest = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + { + rtx rax = gen_rtx_REG (Pmode, 0), insns; + + start_sequence (); + emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); + insns = get_insns (); + end_sequence (); + + emit_libcall_block (insns, dest, rax, x); + } + else + emit_insn (gen_tls_global_dynamic_32 (dest, x)); + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + base = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + { + rtx rax = gen_rtx_REG (Pmode, 0), insns, note; + + start_sequence (); + emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); + insns = get_insns (); + end_sequence (); + + note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); + note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); + emit_libcall_block (insns, base, rax, note); + } + else + emit_insn (gen_tls_local_dynamic_base_32 (base)); + + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); + off = gen_rtx_CONST (Pmode, off); + + return gen_rtx_PLUS (Pmode, base, off); + + case TLS_MODEL_INITIAL_EXEC: + if (TARGET_64BIT) + { + pic = NULL; + type = UNSPEC_GOTNTPOFF; + } + else if (flag_pic) + { + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + pic = pic_offset_table_rtx; + type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; + } + else if (!TARGET_GNU_TLS) + { + pic = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (pic)); + type = UNSPEC_GOTTPOFF; + } + else + { + pic = NULL; + type = UNSPEC_INDNTPOFF; + } + + base = get_thread_pointer (); + + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); + off = gen_rtx_CONST (Pmode, off); + if (pic) + off = gen_rtx_PLUS (Pmode, pic, off); + off = gen_rtx_MEM (Pmode, off); + RTX_UNCHANGING_P (off) = 1; + set_mem_alias_set (off, ix86_GOT_alias_set ()); + dest = gen_reg_rtx (Pmode); + + if (TARGET_64BIT || TARGET_GNU_TLS) + { + emit_move_insn (dest, off); + return gen_rtx_PLUS (Pmode, base, dest); + } + else + emit_insn (gen_subsi3 (dest, base, off)); + break; + + case TLS_MODEL_LOCAL_EXEC: + base = get_thread_pointer (); + + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), + (TARGET_64BIT || TARGET_GNU_TLS) + ? UNSPEC_NTPOFF : UNSPEC_TPOFF); + off = gen_rtx_CONST (Pmode, off); + + if (TARGET_64BIT || TARGET_GNU_TLS) + return gen_rtx_PLUS (Pmode, base, off); + else + { + dest = gen_reg_rtx (Pmode); + emit_insn (gen_subsi3 (dest, base, off)); + } + break; + + default: + abort (); + } + + return dest; + } + if (flag_pic && SYMBOLIC_CONST (x)) return legitimize_pic_address (x, 0); @@ -5371,7 +6239,7 @@ output_pic_addr_const (file, x, code) case SYMBOL_REF: assemble_name (file, XSTR (x, 0)); - if (code == 'P' && ! SYMBOL_REF_FLAG (x)) + if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x)) fputs ("@PLT", file); break; @@ -5429,30 +6297,54 @@ output_pic_addr_const (file, x, code) break; case MINUS: - putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); + if (!TARGET_MACHO) + putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); output_pic_addr_const (file, XEXP (x, 0), code); putc ('-', file); output_pic_addr_const (file, XEXP (x, 1), code); - putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); + if (!TARGET_MACHO) + putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); break; case UNSPEC: if (XVECLEN (x, 0) != 1) - abort (); + abort (); output_pic_addr_const (file, XVECEXP (x, 0, 0), code); switch (XINT (x, 1)) { - case 6: + case UNSPEC_GOT: fputs ("@GOT", file); break; - case 7: + case UNSPEC_GOTOFF: fputs ("@GOTOFF", file); break; - case 8: - fputs ("@PLT", file); + case UNSPEC_GOTPCREL: + fputs ("@GOTPCREL(%rip)", file); + break; + case UNSPEC_GOTTPOFF: + /* FIXME: This might be @TPOFF in Sun ld too. */ + fputs ("@GOTTPOFF", file); + break; + case UNSPEC_TPOFF: + fputs ("@TPOFF", file); break; - case 15: - fputs ("@GOTPCREL(%RIP)", file); + case UNSPEC_NTPOFF: + if (TARGET_64BIT) + fputs ("@TPOFF", file); + else + fputs ("@NTPOFF", file); + break; + case UNSPEC_DTPOFF: + fputs ("@DTPOFF", file); + break; + case UNSPEC_GOTNTPOFF: + if (TARGET_64BIT) + fputs ("@GOTTPOFF(%rip)", file); + else + fputs ("@GOTNTPOFF", file); + break; + case UNSPEC_INDNTPOFF: + fputs ("@INDNTPOFF", file); break; default: output_operand_lossage ("invalid UNSPEC as operand"); @@ -5487,6 +6379,30 @@ i386_dwarf_output_addr_const (file, x) fputc ('\n', file); } +/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL. + We need to emit DTP-relative relocations. */ + +void +i386_output_dwarf_dtprel (file, size, x) + FILE *file; + int size; + rtx x; +{ + fputs (ASM_LONG, file); + output_addr_const (file, x); + fputs ("@DTPOFF", file); + switch (size) + { + case 4: + break; + case 8: + fputs (", 0", file); + break; + default: + abort (); + } +} + /* In the name of slightly smaller debug output, and to cater to general assembler losage, recognize PIC+GOTOFF and turn it back into a direct symbol reference. */ @@ -5504,7 +6420,7 @@ i386_simplify_dwarf_addr (orig_x) { if (GET_CODE (x) != CONST || GET_CODE (XEXP (x, 0)) != UNSPEC - || XINT (XEXP (x, 0), 1) != 15 + || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL || GET_CODE (orig_x) != MEM) return orig_x; return XVECEXP (XEXP (x, 0), 0, 0); @@ -5540,8 +6456,8 @@ i386_simplify_dwarf_addr (orig_x) x = XEXP (XEXP (x, 1), 0); if (GET_CODE (x) == UNSPEC - && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM) - || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM))) + && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) + || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) { if (y) return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); @@ -5551,8 +6467,9 @@ i386_simplify_dwarf_addr (orig_x) if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == UNSPEC && GET_CODE (XEXP (x, 1)) == CONST_INT - && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM) - || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM))) + && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) + || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF + && GET_CODE (orig_x) != MEM))) { x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); if (y) @@ -5665,7 +6582,7 @@ print_reg (x, code, file) || REGNO (x) == FPSR_REG) abort (); - if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) + if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) putc ('%', file); if (code == 'w' || MMX_REG_P (x)) @@ -5742,6 +6659,43 @@ print_reg (x, code, file) } } +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in some tls_local_dynamic_base + pattern. */ + +static const char * +get_some_local_dynamic_name () +{ + rtx insn; + + if (cfun->machine->some_ld_name) + return cfun->machine->some_ld_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) + return cfun->machine->some_ld_name; + + abort (); +} + +static int +get_some_local_dynamic_name_1 (px, data) + rtx *px; + void *data ATTRIBUTE_UNUSED; +{ + rtx x = *px; + + if (GET_CODE (x) == SYMBOL_REF + && local_dynamic_symbolic_operand (x, Pmode)) + { + cfun->machine->some_ld_name = XSTR (x, 0); + return 1; + } + + return 0; +} + /* Meaning of CODE: L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. C -- print opcode suffix for set/cmov insn. @@ -5766,6 +6720,7 @@ print_reg (x, code, file) D -- print condition for SSE cmp instruction. P -- if PIC, print an @PLT suffix. X -- don't print any sort of PIC '@' suffix for a symbol. + & -- print some in-use local-dynamic symbol name. */ void @@ -5783,6 +6738,10 @@ print_operand (file, x, code) putc ('*', file); return; + case '&': + assemble_name (file, get_some_local_dynamic_name ()); + return; + case 'A': if (ASSEMBLER_DIALECT == ASM_ATT) putc ('*', file); @@ -5976,7 +6935,7 @@ print_operand (file, x, code) /* Like above, but reverse condition */ case 'c': - /* Check to see if argument to %c is really a constant + /* Check to see if argument to %c is really a constant and not a condition code which needs to be reversed. */ if (GET_RTX_CLASS (GET_CODE (x)) != '<') { @@ -6069,9 +7028,11 @@ print_operand (file, x, code) if (flag_pic && CONSTANT_ADDRESS_P (x)) output_pic_addr_const (file, x, code); /* Avoid (%rip) for call operands. */ - else if (CONSTANT_ADDRESS_P (x) && code =='P' + else if (CONSTANT_ADDRESS_P (x) && code == 'P' && GET_CODE (x) != CONST_INT) output_addr_const (file, x); + else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) + output_operand_lossage ("invalid constraints for operand"); else output_address (x); } @@ -6092,24 +7053,21 @@ print_operand (file, x, code) /* These float cases don't actually occur as immediate operands. */ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) { - REAL_VALUE_TYPE r; char dstr[30]; - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); + real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); fprintf (file, "%s", dstr); } else if (GET_CODE (x) == CONST_DOUBLE && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)) { - REAL_VALUE_TYPE r; char dstr[30]; - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); + real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); fprintf (file, "%s", dstr); } + else { if (code != 'P') @@ -6148,12 +7106,22 @@ print_operand_address (file, addr) rtx base, index, disp; int scale; - if (! ix86_decompose_address (addr, &parts)) + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP) { - output_operand_lossage ("Wrong address expression or operand constraint"); + if (ASSEMBLER_DIALECT == ASM_INTEL) + fputs ("DWORD PTR ", file); + if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) + putc ('%', file); + if (TARGET_64BIT) + fputs ("fs:0", file); + else + fputs ("gs:0", file); return; } + if (! ix86_decompose_address (addr, &parts)) + abort (); + base = parts.base; index = parts.index; disp = parts.disp; @@ -6179,7 +7147,15 @@ print_operand_address (file, addr) output_addr_const (file, addr); /* Use one byte shorter RIP relative addressing for 64bit mode. */ - if (GET_CODE (disp) != CONST_INT && TARGET_64BIT) + if (TARGET_64BIT + && ((GET_CODE (addr) == SYMBOL_REF + && ! tls_symbolic_operand (addr, GET_MODE (addr))) + || GET_CODE (addr) == LABEL_REF + || (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF) + && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT))) fputs ("(%rip)", file); } else @@ -6261,6 +7237,58 @@ print_operand_address (file, addr) } } } + +bool +output_addr_const_extra (file, x) + FILE *file; + rtx x; +{ + rtx op; + + if (GET_CODE (x) != UNSPEC) + return false; + + op = XVECEXP (x, 0, 0); + switch (XINT (x, 1)) + { + case UNSPEC_GOTTPOFF: + output_addr_const (file, op); + /* FIXME: This might be @TPOFF in Sun ld. */ + fputs ("@GOTTPOFF", file); + break; + case UNSPEC_TPOFF: + output_addr_const (file, op); + fputs ("@TPOFF", file); + break; + case UNSPEC_NTPOFF: + output_addr_const (file, op); + if (TARGET_64BIT) + fputs ("@TPOFF", file); + else + fputs ("@NTPOFF", file); + break; + case UNSPEC_DTPOFF: + output_addr_const (file, op); + fputs ("@DTPOFF", file); + break; + case UNSPEC_GOTNTPOFF: + output_addr_const (file, op); + if (TARGET_64BIT) + fputs ("@GOTTPOFF(%rip)", file); + else + fputs ("@GOTNTPOFF", file); + break; + case UNSPEC_INDNTPOFF: + output_addr_const (file, op); + fputs ("@INDNTPOFF", file); + break; + + default: + return false; + } + + return true; +} /* Split one or more DImode RTL references into pairs of SImode references. The RTL can be REG, offsettable MEM, integer constant, or @@ -6630,12 +7658,12 @@ output_fp_compare (insn, operands, eflags_p, unordered_p) if (unordered_p) return "ucomiss\t{%1, %0|%0, %1}"; else - return "comiss\t{%1, %0|%0, %y}"; + return "comiss\t{%1, %0|%0, %1}"; else if (unordered_p) return "ucomisd\t{%1, %0|%0, %1}"; else - return "comisd\t{%1, %0|%0, %y}"; + return "comisd\t{%1, %0|%0, %1}"; } if (! STACK_TOP_P (cmp_op0)) @@ -6760,13 +7788,18 @@ ix86_output_addr_diff_elt (file, value, rel) int value, rel; { if (TARGET_64BIT) - fprintf (file, "%s%s%d-.+(.-%s%d)\n", + fprintf (file, "%s%s%d-%s%d\n", ASM_LONG, LPREFIX, value, LPREFIX, rel); else if (HAVE_AS_GOTOFF_IN_DATA) fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); +#if TARGET_MACHO + else if (TARGET_MACHO) + fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value, + machopic_function_base_name () + 1); +#endif else - asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n", - ASM_LONG, LPREFIX, value); + asm_fprintf (file, "%s%U%s+[.-%s%d]\n", + ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); } /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate @@ -6798,51 +7831,117 @@ ix86_expand_clear (dest) emit_insn (tmp); } +/* X is an unchanging MEM. If it is a constant pool reference, return + the constant pool rtx, else NULL. */ + +static rtx +maybe_get_pool_constant (x) + rtx x; +{ + x = XEXP (x, 0); + + if (flag_pic && ! TARGET_64BIT) + { + if (GET_CODE (x) != PLUS) + return NULL_RTX; + if (XEXP (x, 0) != pic_offset_table_rtx) + return NULL_RTX; + x = XEXP (x, 1); + if (GET_CODE (x) != CONST) + return NULL_RTX; + x = XEXP (x, 0); + if (GET_CODE (x) != UNSPEC) + return NULL_RTX; + if (XINT (x, 1) != UNSPEC_GOTOFF) + return NULL_RTX; + x = XVECEXP (x, 0, 0); + } + + if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + return get_pool_constant (x); + + return NULL_RTX; +} + void ix86_expand_move (mode, operands) enum machine_mode mode; rtx operands[]; { int strict = (reload_in_progress || reload_completed); - rtx insn; + rtx insn, op0, op1, tmp; - if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode)) - { - /* Emit insns to move operands[1] into operands[0]. */ + op0 = operands[0]; + op1 = operands[1]; - if (GET_CODE (operands[0]) == MEM) - operands[1] = force_reg (Pmode, operands[1]); + if (tls_symbolic_operand (op1, Pmode)) + { + op1 = legitimize_address (op1, op1, VOIDmode); + if (GET_CODE (op0) == MEM) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, op1)); + op1 = tmp; + } + } + else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) + { +#if TARGET_MACHO + if (MACHOPIC_PURE) + { + rtx temp = ((reload_in_progress + || ((op0 && GET_CODE (op0) == REG) + && mode == Pmode)) + ? op0 : gen_reg_rtx (Pmode)); + op1 = machopic_indirect_data_reference (op1, temp); + op1 = machopic_legitimize_pic_address (op1, mode, + temp == op1 ? 0 : temp); + } else { - rtx temp = operands[0]; + if (MACHOPIC_INDIRECT) + op1 = machopic_indirect_data_reference (op1, 0); + } + if (op0 != op1) + { + insn = gen_rtx_SET (VOIDmode, op0, op1); + emit_insn (insn); + } + return; +#endif /* TARGET_MACHO */ + if (GET_CODE (op0) == MEM) + op1 = force_reg (Pmode, op1); + else + { + rtx temp = op0; if (GET_CODE (temp) != REG) temp = gen_reg_rtx (Pmode); - temp = legitimize_pic_address (operands[1], temp); - if (temp == operands[0]) + temp = legitimize_pic_address (op1, temp); + if (temp == op0) return; - operands[1] = temp; + op1 = temp; } } else { - if (GET_CODE (operands[0]) == MEM + if (GET_CODE (op0) == MEM && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) - || !push_operand (operands[0], mode)) - && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (mode, operands[1]); + || !push_operand (op0, mode)) + && GET_CODE (op1) == MEM) + op1 = force_reg (mode, op1); - if (push_operand (operands[0], mode) - && ! general_no_elim_operand (operands[1], mode)) - operands[1] = copy_to_mode_reg (mode, operands[1]); + if (push_operand (op0, mode) + && ! general_no_elim_operand (op1, mode)) + op1 = copy_to_mode_reg (mode, op1); /* Force large constants in 64bit compilation into register to get them CSEed. */ if (TARGET_64BIT && mode == DImode - && immediate_operand (operands[1], mode) - && !x86_64_zero_extended_value (operands[1]) - && !register_operand (operands[0], mode) + && immediate_operand (op1, mode) + && !x86_64_zero_extended_value (op1) + && !register_operand (op0, mode) && optimize && !reload_completed && !reload_in_progress) - operands[1] = copy_to_mode_reg (mode, operands[1]); + op1 = copy_to_mode_reg (mode, op1); if (FLOAT_MODE_P (mode)) { @@ -6852,13 +7951,21 @@ ix86_expand_move (mode, operands) if (strict) ; - else if (GET_CODE (operands[1]) == CONST_DOUBLE - && register_operand (operands[0], mode)) - operands[1] = validize_mem (force_const_mem (mode, operands[1])); + else if (GET_CODE (op1) == CONST_DOUBLE) + { + op1 = validize_mem (force_const_mem (mode, op1)); + if (!register_operand (op0, mode)) + { + rtx temp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); + emit_move_insn (op0, temp); + return; + } + } } } - insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]); + insn = gen_rtx_SET (VOIDmode, op0, op1); emit_insn (insn); } @@ -6874,15 +7981,15 @@ ix86_expand_vector_move (mode, operands) to handle some of them more efficiently. */ if ((reload_in_progress | reload_completed) == 0 && register_operand (operands[0], mode) - && CONSTANT_P (operands[1])) + && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) { - rtx addr = gen_reg_rtx (Pmode); - emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (mode, addr); + operands[1] = force_const_mem (mode, operands[1]); + emit_move_insn (operands[0], operands[1]); + return; } /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 + if (!no_new_pseudos && !register_operand (operands[0], mode) && !register_operand (operands[1], mode)) { @@ -6892,7 +7999,7 @@ ix86_expand_vector_move (mode, operands) } emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); -} +} /* Attempt to expand a binary operator. Make the expansion closer to the actual machine, then just general_operand, which will allow 3 separate @@ -7540,7 +8647,7 @@ ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) else { tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); - tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); + tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); if (!scratch) scratch = gen_reg_rtx (HImode); emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); @@ -7563,7 +8670,7 @@ ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) { /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); - tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); + tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); if (!scratch) scratch = gen_reg_rtx (HImode); emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); @@ -7757,7 +8864,7 @@ ix86_expand_branch (code, label) code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, &ix86_compare_op1); - + ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); /* Check whether we will use the natural sequence with one jump. If @@ -8074,7 +9181,7 @@ ix86_expand_int_movcc (operands) start_sequence (); compare_op = ix86_expand_compare (code, &second_test, &bypass_test); - compare_seq = gen_sequence (); + compare_seq = get_insns (); end_sequence (); compare_code = GET_CODE (compare_op); @@ -8095,14 +9202,13 @@ ix86_expand_int_movcc (operands) if ((compare_code == LTU || compare_code == GEU) && !second_test && !bypass_test) { - /* Detect overlap between destination and compare sources. */ rtx tmp = out; /* To simplify rest of code, restrict to the GEU case. */ if (compare_code == LTU) { - int tmp = ct; + HOST_WIDE_INT tmp = ct; ct = cf; cf = tmp; compare_code = reverse_condition (compare_code); @@ -8152,7 +9258,7 @@ ix86_expand_int_movcc (operands) /* * cmpl op0,op1 * sbbl dest,dest - * xorl $-1, dest + * notl dest * [addl dest, cf] * * Size 8 - 11. @@ -8168,15 +9274,23 @@ ix86_expand_int_movcc (operands) /* * cmpl op0,op1 * sbbl dest,dest + * [notl dest] * andl cf - ct, dest * [addl dest, ct] * * Size 8 - 11. */ + + if (cf == 0) + { + cf = ct; + ct = 0; + tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); + } + tmp = expand_simple_binop (mode, AND, tmp, - GEN_INT (trunc_int_for_mode - (cf - ct, mode)), + gen_int_mode (cf - ct, mode), tmp, 1, OPTAB_DIRECT); if (ct) tmp = expand_simple_binop (mode, PLUS, @@ -8211,6 +9325,61 @@ ix86_expand_int_movcc (operands) code = reverse_condition (code); } } + + compare_code = NIL; + if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT + && GET_CODE (ix86_compare_op1) == CONST_INT) + { + if (ix86_compare_op1 == const0_rtx + && (code == LT || code == GE)) + compare_code = code; + else if (ix86_compare_op1 == constm1_rtx) + { + if (code == LE) + compare_code = LT; + else if (code == GT) + compare_code = GE; + } + } + + /* Optimize dest = (op0 < 0) ? -1 : cf. */ + if (compare_code != NIL + && GET_MODE (ix86_compare_op0) == GET_MODE (out) + && (cf == -1 || ct == -1)) + { + /* If lea code below could be used, only optimize + if it results in a 2 insn sequence. */ + + if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 + || diff == 3 || diff == 5 || diff == 9) + || (compare_code == LT && ct == -1) + || (compare_code == GE && cf == -1)) + { + /* + * notl op1 (if necessary) + * sarl $31, op1 + * orl cf, op1 + */ + if (ct != -1) + { + cf = ct; + ct = -1; + code = reverse_condition (code); + } + + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, -1); + + out = expand_simple_binop (mode, IOR, + out, GEN_INT (cf), + out, 1, OPTAB_DIRECT); + if (out != operands[0]) + emit_move_insn (operands[0], out); + + return 1; /* DONE */ + } + } + if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 || diff == 3 || diff == 5 || diff == 9) && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) @@ -8233,14 +9402,14 @@ ix86_expand_int_movcc (operands) ix86_compare_op1, VOIDmode, 0, 1); nops = 0; - /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics - done in proper mode to match. */ + /* On x86_64 the lea instruction operates on Pmode, so we need + to get arithmetics done in proper mode to match. */ if (diff == 1) - tmp = out; + tmp = copy_rtx (out); else { rtx out1; - out1 = out; + out1 = copy_rtx (out); tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); nops++; if (diff & 1) @@ -8258,21 +9427,12 @@ ix86_expand_int_movcc (operands) && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) { if (nops == 1) - { - rtx clob; - - clob = gen_rtx_REG (CCmode, FLAGS_REG); - clob = gen_rtx_CLOBBER (VOIDmode, clob); - - tmp = gen_rtx_SET (VOIDmode, out, tmp); - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, copy_rtx (tmp), clob)); - emit_insn (tmp); - } + out = force_operand (tmp, copy_rtx (out)); else - emit_insn (gen_rtx_SET (VOIDmode, out, tmp)); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); } if (out != operands[0]) - emit_move_insn (operands[0], out); + emit_move_insn (operands[0], copy_rtx (out)); return 1; /* DONE */ } @@ -8298,40 +9458,68 @@ ix86_expand_int_movcc (operands) if (!optimize_size && !TARGET_CMOVE) { - if (ct == 0) + if (cf == 0) { - ct = cf; - cf = 0; + cf = ct; + ct = 0; if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) + /* We may be reversing unordered compare to normal compare, + that is not valid in general (we may convert non-trapping + condition to trapping one), however on i386 we currently + emit all comparisons unordered. */ + code = reverse_condition_maybe_unordered (code); + else + { + code = reverse_condition (code); + if (compare_code != NIL) + compare_code = reverse_condition (compare_code); + } + } + + if (compare_code != NIL) + { + /* notl op1 (if needed) + sarl $31, op1 + andl (cf-ct), op1 + addl ct, op1 + + For x < 0 (resp. x <= -1) there will be no notl, + so if possible swap the constants to get rid of the + complement. + True/false will be -1/0 while code below (store flag + followed by decrement) is 0/-1, so the constants need + to be exchanged once more. */ + + if (compare_code == GE || !cf) { - /* We may be reversing unordered compare to normal compare, - that is not valid in general (we may convert non-trapping - condition to trapping one), however on i386 we currently - emit all comparisons unordered. */ - compare_code = reverse_condition_maybe_unordered (compare_code); - code = reverse_condition_maybe_unordered (code); + code = reverse_condition (code); + compare_code = LT; } else { - compare_code = reverse_condition (compare_code); - code = reverse_condition (code); + HOST_WIDE_INT tmp = cf; + cf = ct; + ct = tmp; } + + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, -1); } + else + { + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, 1); - out = emit_store_flag (out, code, ix86_compare_op0, - ix86_compare_op1, VOIDmode, 0, 1); + out = expand_simple_binop (mode, PLUS, out, constm1_rtx, + out, 1, OPTAB_DIRECT); + } - out = expand_simple_binop (mode, PLUS, - out, constm1_rtx, - out, 1, OPTAB_DIRECT); - out = expand_simple_binop (mode, AND, - out, - GEN_INT (trunc_int_for_mode - (cf - ct, mode)), - out, 1, OPTAB_DIRECT); - out = expand_simple_binop (mode, PLUS, - out, GEN_INT (ct), + out = expand_simple_binop (mode, AND, out, + gen_int_mode (cf - ct, mode), out, 1, OPTAB_DIRECT); + if (ct) + out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct), + out, 1, OPTAB_DIRECT); if (out != operands[0]) emit_move_insn (operands[0], out); @@ -8622,13 +9810,14 @@ ix86_split_to_parts (operand, parts, mode) if (size < 2 || size > 3) abort (); - /* Optimize constant pool reference to immediates. This is used by fp moves, - that force all constants to memory to allow combining. */ - - if (GET_CODE (operand) == MEM - && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0))) - operand = get_pool_constant (XEXP (operand, 0)); + /* Optimize constant pool reference to immediates. This is used by fp + moves, that force all constants to memory to allow combining. */ + if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand)) + { + rtx tmp = maybe_get_pool_constant (operand); + if (tmp) + operand = tmp; + } if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) { @@ -8674,7 +9863,7 @@ ix86_split_to_parts (operand, parts, mode) case XFmode: case TFmode: REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); - parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode)); + parts[2] = gen_int_mode (l[2], SImode); break; case DFmode: REAL_VALUE_TO_TARGET_DOUBLE (r, l); @@ -8682,8 +9871,8 @@ ix86_split_to_parts (operand, parts, mode) default: abort (); } - parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode)); - parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode)); + parts[1] = gen_int_mode (l[1], SImode); + parts[0] = gen_int_mode (l[0], SImode); } else abort (); @@ -8718,13 +9907,13 @@ ix86_split_to_parts (operand, parts, mode) /* Do not use shift by 32 to avoid warning on 32bit systems. */ if (HOST_BITS_PER_WIDE_INT >= 64) parts[0] - = GEN_INT (trunc_int_for_mode + = gen_int_mode ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) + ((((HOST_WIDE_INT) l[1]) << 31) << 1), - DImode)); + DImode); else parts[0] = immed_double_const (l[0], l[1], DImode); - parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode)); + parts[1] = gen_int_mode (l[2], SImode); } else abort (); @@ -8819,15 +10008,24 @@ ix86_split_long_move (operands) Do an lea to the last part and use only one colliding move. */ else if (collisions > 1) { + rtx base; + collisions = 1; - emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1], - XEXP (part[1][0], 0))); - part[1][0] = change_address (part[1][0], - TARGET_64BIT ? DImode : SImode, - part[0][nparts - 1]); - part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD); + + base = part[0][nparts - 1]; + + /* Handle the case when the last part isn't valid for lea. + Happens in 64-bit mode storing the 12-byte XFmode. */ + if (GET_MODE (base) != Pmode) + base = gen_rtx_REG (Pmode, REGNO (base)); + + emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); + part[1][0] = replace_equiv_address (part[1][0], base); + part[1][1] = replace_equiv_address (part[1][1], + plus_constant (base, UNITS_PER_WORD)); if (nparts == 3) - part[1][2] = adjust_address (part[1][0], VOIDmode, 8); + part[1][2] = replace_equiv_address (part[1][2], + plus_constant (base, 8)); } } @@ -9356,7 +10554,7 @@ ix86_expand_movstr (dst, src, count_exp, align_exp) end_sequence (); ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg); - emit_insns (insns); + emit_insn (insns); return 1; } @@ -9757,8 +10955,7 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx) emit_insn (gen_one_cmplsi2 (scratch, scratch)); emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); emit_insn (gen_andsi3 (tmpreg, tmpreg, - GEN_INT (trunc_int_for_mode - (0x80808080, SImode)))); + gen_int_mode (0x80808080, SImode))); emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, align_4_label); @@ -9824,43 +11021,66 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx) emit_label (end_0_label); } - -/* Clear stack slot assignments remembered from previous functions. - This is called from INIT_EXPANDERS once before RTL is emitted for each - function. */ -static void -ix86_init_machine_status (p) - struct function *p; +void +ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop) + rtx retval, fnaddr, callarg1, callarg2, pop; { - p->machine = (struct machine_function *) - xcalloc (1, sizeof (struct machine_function)); -} + rtx use = NULL, call; -/* Mark machine specific bits of P for GC. */ -static void -ix86_mark_machine_status (p) - struct function *p; -{ - struct machine_function *machine = p->machine; - enum machine_mode mode; - int n; + if (pop == const0_rtx) + pop = NULL; + if (TARGET_64BIT && pop) + abort (); - if (! machine) - return; +#if TARGET_MACHO + if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) + fnaddr = machopic_indirect_call_target (fnaddr); +#else + /* Static functions and indirect calls don't need the pic register. */ + if (! TARGET_64BIT && flag_pic + && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF + && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0))) + use_reg (&use, pic_offset_table_rtx); + + if (TARGET_64BIT && INTVAL (callarg2) >= 0) + { + rtx al = gen_rtx_REG (QImode, 0); + emit_move_insn (al, callarg2); + use_reg (&use, al); + } +#endif /* TARGET_MACHO */ + + if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) + { + fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); + fnaddr = gen_rtx_MEM (QImode, fnaddr); + } - for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE; - mode = (enum machine_mode) ((int) mode + 1)) - for (n = 0; n < MAX_386_STACK_LOCALS; n++) - ggc_mark_rtx (machine->stack_locals[(int) mode][n]); + call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + if (retval) + call = gen_rtx_SET (VOIDmode, retval, call); + if (pop) + { + pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); + pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); + call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); + } + + call = emit_call_insn (call); + if (use) + CALL_INSN_FUNCTION_USAGE (call) = use; } -static void -ix86_free_machine_status (p) - struct function *p; + +/* Clear stack slot assignments remembered from previous functions. + This is called from INIT_EXPANDERS once before RTL is emitted for each + function. */ + +static struct machine_function * +ix86_init_machine_status () { - free (p->machine); - p->machine = NULL; + return ggc_alloc_cleared (sizeof (struct machine_function)); } /* Return a MEM corresponding to a stack slot with mode MODE. @@ -9883,6 +11103,24 @@ assign_386_stack_local (mode, n) return ix86_stack_locals[(int) mode][n]; } + +/* Construct the SYMBOL_REF for the tls_get_addr function. */ + +static GTY(()) rtx ix86_tls_symbol; +rtx +ix86_tls_get_addr () +{ + + if (!ix86_tls_symbol) + { + ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, + (TARGET_GNU_TLS && !TARGET_64BIT) + ? "___tls_get_addr" + : "__tls_get_addr"); + } + + return ix86_tls_symbol; +} /* Calculate the length of the memory address in the instruction encoding. Does not include the one-byte modrm, opcode, or prefix. */ @@ -9930,7 +11168,8 @@ memory_address_length (addr) if (disp) { if (GET_CODE (disp) == CONST_INT - && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')) + && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') + && base) len = 1; else len = 4; @@ -9944,8 +11183,8 @@ memory_address_length (addr) return len; } -/* Compute default value for "length_immediate" attribute. When SHORTFORM is set - expect that insn have 8bit immediate alternative. */ +/* Compute default value for "length_immediate" attribute. When SHORTFORM + is set, expect that insn have 8bit immediate alternative. */ int ix86_attr_length_immediate_default (insn, shortform) rtx insn; @@ -9993,6 +11232,26 @@ ix86_attr_length_address_default (insn) rtx insn; { int i; + + if (get_attr_type (insn) == TYPE_LEA) + { + rtx set = PATTERN (insn); + if (GET_CODE (set) == SET) + ; + else if (GET_CODE (set) == PARALLEL + && GET_CODE (XVECEXP (set, 0, 0)) == SET) + set = XVECEXP (set, 0, 0); + else + { +#ifdef ENABLE_CHECKING + abort (); +#endif + return 0; + } + + return memory_address_length (SET_SRC (set)); + } + extract_insn_cached (insn); for (i = recog_data.n_operands - 1; i >= 0; --i) if (GET_CODE (recog_data.operand[i]) == MEM) @@ -10271,46 +11530,6 @@ static union } ppro; } ix86_sched_data; -static int -ix86_safe_length (insn) - rtx insn; -{ - if (recog_memoized (insn) >= 0) - return get_attr_length (insn); - else - return 128; -} - -static int -ix86_safe_length_prefix (insn) - rtx insn; -{ - if (recog_memoized (insn) >= 0) - return get_attr_length (insn); - else - return 0; -} - -static enum attr_memory -ix86_safe_memory (insn) - rtx insn; -{ - if (recog_memoized (insn) >= 0) - return get_attr_memory (insn); - else - return MEMORY_UNKNOWN; -} - -static enum attr_pent_pair -ix86_safe_pent_pair (insn) - rtx insn; -{ - if (recog_memoized (insn) >= 0) - return get_attr_pent_pair (insn); - else - return PENT_PAIR_NP; -} - static enum attr_ppro_uops ix86_safe_ppro_uops (insn) rtx insn; @@ -10364,129 +11583,6 @@ ix86_reorder_insn (insnp, slot) } } -/* Find an instruction with given pairability and minimal amount of cycles - lost by the fact that the CPU waits for both pipelines to finish before - reading next instructions. Also take care that both instructions together - can not exceed 7 bytes. */ - -static rtx * -ix86_pent_find_pair (e_ready, ready, type, first) - rtx *e_ready; - rtx *ready; - enum attr_pent_pair type; - rtx first; -{ - int mincycles, cycles; - enum attr_pent_pair tmp; - enum attr_memory memory; - rtx *insnp, *bestinsnp = NULL; - - if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first)) - return NULL; - - memory = ix86_safe_memory (first); - cycles = result_ready_cost (first); - mincycles = INT_MAX; - - for (insnp = e_ready; insnp >= ready && mincycles; --insnp) - if ((tmp = ix86_safe_pent_pair (*insnp)) == type - && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp)) - { - enum attr_memory second_memory; - int secondcycles, currentcycles; - - second_memory = ix86_safe_memory (*insnp); - secondcycles = result_ready_cost (*insnp); - currentcycles = abs (cycles - secondcycles); - - if (secondcycles >= 1 && cycles >= 1) - { - /* Two read/modify/write instructions together takes two - cycles longer. */ - if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH) - currentcycles += 2; - - /* Read modify/write instruction followed by read/modify - takes one cycle longer. */ - if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD - && tmp != PENT_PAIR_UV - && ix86_safe_pent_pair (first) != PENT_PAIR_UV) - currentcycles += 1; - } - if (currentcycles < mincycles) - bestinsnp = insnp, mincycles = currentcycles; - } - - return bestinsnp; -} - -/* Subroutines of ix86_sched_reorder. */ - -static void -ix86_sched_reorder_pentium (ready, e_ready) - rtx *ready; - rtx *e_ready; -{ - enum attr_pent_pair pair1, pair2; - rtx *insnp; - - /* This wouldn't be necessary if Haifa knew that static insn ordering - is important to which pipe an insn is issued to. So we have to make - some minor rearrangements. */ - - pair1 = ix86_safe_pent_pair (*e_ready); - - /* If the first insn is non-pairable, let it be. */ - if (pair1 == PENT_PAIR_NP) - return; - - pair2 = PENT_PAIR_NP; - insnp = 0; - - /* If the first insn is UV or PV pairable, search for a PU - insn to go with. */ - if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV) - { - insnp = ix86_pent_find_pair (e_ready-1, ready, - PENT_PAIR_PU, *e_ready); - if (insnp) - pair2 = PENT_PAIR_PU; - } - - /* If the first insn is PU or UV pairable, search for a PV - insn to go with. */ - if (pair2 == PENT_PAIR_NP - && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV)) - { - insnp = ix86_pent_find_pair (e_ready-1, ready, - PENT_PAIR_PV, *e_ready); - if (insnp) - pair2 = PENT_PAIR_PV; - } - - /* If the first insn is pairable, search for a UV - insn to go with. */ - if (pair2 == PENT_PAIR_NP) - { - insnp = ix86_pent_find_pair (e_ready-1, ready, - PENT_PAIR_UV, *e_ready); - if (insnp) - pair2 = PENT_PAIR_UV; - } - - if (pair2 == PENT_PAIR_NP) - return; - - /* Found something! Decide if we need to swap the order. */ - if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU - || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV - && ix86_safe_memory (*e_ready) == MEMORY_BOTH - && ix86_safe_memory (*insnp) == MEMORY_LOAD)) - ix86_reorder_insn (insnp, e_ready); - else - ix86_reorder_insn (insnp, e_ready - 1); -} - static void ix86_sched_reorder_ppro (ready, e_ready) rtx *ready; @@ -10541,7 +11637,7 @@ ix86_sched_reorder_ppro (ready, e_ready) for (i = 1; i < 3; ++i) if (decode[i] == NULL) { - if (ready >= e_ready) + if (ready > e_ready) goto ppro_done; insnp = e_ready; @@ -10585,18 +11681,20 @@ ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var) int n_ready = *n_readyp; rtx *e_ready = ready + n_ready - 1; + /* Make sure to go ahead and initialize key items in + ix86_sched_data if we are not going to bother trying to + reorder the ready queue. */ if (n_ready < 2) - goto out; + { + ix86_sched_data.ppro.issued_this_cycle = 1; + goto out; + } switch (ix86_cpu) { default: break; - case PROCESSOR_PENTIUM: - ix86_sched_reorder_pentium (ready, e_ready); - break; - case PROCESSOR_PENTIUMPRO: ix86_sched_reorder_ppro (ready, e_ready); break; @@ -10668,6 +11766,28 @@ ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) return --ix86_sched_data.ppro.issued_this_cycle; } } + +static int +ia32_use_dfa_pipeline_interface () +{ + if (ix86_cpu == PROCESSOR_PENTIUM) + return 1; + return 0; +} + +/* How many alternative schedules to try. This should be as wide as the + scheduling freedom in the DFA, but no wider. Making this value too + large results extra work for the scheduler. */ + +static int +ia32_multipass_dfa_lookahead () +{ + if (ix86_cpu == PROCESSOR_PENTIUM) + return 2; + else + return 0; +} + /* Walk through INSNS and look for MEM references whose address is DSTREG or SRCREG and set the memory attribute to those of DSTREF and SRCREF, as @@ -10877,10 +11997,10 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) plus_constant (tramp, 10), NULL_RTX, 1, OPTAB_DIRECT); emit_move_insn (gen_rtx_MEM (QImode, tramp), - GEN_INT (trunc_int_for_mode (0xb9, QImode))); + gen_int_mode (0xb9, QImode)); emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), - GEN_INT (trunc_int_for_mode (0xe9, QImode))); + gen_int_mode (0xe9, QImode)); emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); } else @@ -10893,7 +12013,7 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) { fnaddr = copy_to_mode_reg (DImode, fnaddr); emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - GEN_INT (trunc_int_for_mode (0xbb41, HImode))); + gen_int_mode (0xbb41, HImode)); emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), gen_lowpart (SImode, fnaddr)); offset += 6; @@ -10901,32 +12021,39 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) else { emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - GEN_INT (trunc_int_for_mode (0xbb49, HImode))); + gen_int_mode (0xbb49, HImode)); emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), fnaddr); offset += 10; } /* Load static chain using movabs to r10. */ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - GEN_INT (trunc_int_for_mode (0xba49, HImode))); + gen_int_mode (0xba49, HImode)); emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), cxt); offset += 10; /* Jump to the r11 */ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - GEN_INT (trunc_int_for_mode (0xff49, HImode))); + gen_int_mode (0xff49, HImode)); emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), - GEN_INT (trunc_int_for_mode (0xe3, QImode))); + gen_int_mode (0xe3, QImode)); offset += 3; if (offset > TRAMPOLINE_SIZE) abort (); } + +#ifdef TRANSFER_FROM_TRAMPOLINE + emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, tramp, Pmode); +#endif } -#define def_builtin(MASK, NAME, TYPE, CODE) \ -do { \ - if ((MASK) & target_flags) \ - builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \ +#define def_builtin(MASK, NAME, TYPE, CODE) \ +do { \ + if ((MASK) & target_flags \ + && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ + builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + NULL, NULL_TREE); \ } while (0) struct builtin_description @@ -10939,73 +12066,97 @@ struct builtin_description const unsigned int flag; }; +/* Used for builtins that are enabled both by -msse and -msse2. */ +#define MASK_SSE1 (MASK_SSE | MASK_SSE2) +#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) +#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) + static const struct builtin_description bdesc_comi[] = { - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 } + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, }; static const struct builtin_description bdesc_2arg[] = { /* SSE */ - { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, - { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, - { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, - { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, - - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, - - { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, - { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, - - { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, + + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, + + { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + + { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + + { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, /* MMX */ { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, @@ -11018,15 +12169,15 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, @@ -11035,10 +12186,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, @@ -11052,8 +12203,9 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -11074,25 +12226,185 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 } - + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, + + /* SSE2 */ + { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, + + { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, + + /* SSE2 MMX */ + { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, + + { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = { - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, + + { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, - { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, - { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 } + { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 }, + { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 } }; void @@ -11110,218 +12422,303 @@ ix86_init_mmx_sse_builtins () { const struct builtin_description * d; size_t i; - tree endlink = void_list_node; tree pchar_type_node = build_pointer_type (char_type_node); + tree pcchar_type_node = build_pointer_type ( + build_type_variant (char_type_node, 1, 0)); tree pfloat_type_node = build_pointer_type (float_type_node); + tree pcfloat_type_node = build_pointer_type ( + build_type_variant (float_type_node, 1, 0)); tree pv2si_type_node = build_pointer_type (V2SI_type_node); + tree pv2di_type_node = build_pointer_type (V2DI_type_node); tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); /* Comparisons. */ tree int_ftype_v4sf_v4sf - = build_function_type (integer_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, - V4SF_type_node, - endlink))); + = build_function_type_list (integer_type_node, + V4SF_type_node, V4SF_type_node, NULL_TREE); tree v4si_ftype_v4sf_v4sf - = build_function_type (V4SI_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, - V4SF_type_node, - endlink))); + = build_function_type_list (V4SI_type_node, + V4SF_type_node, V4SF_type_node, NULL_TREE); /* MMX/SSE/integer conversions. */ tree int_ftype_v4sf - = build_function_type (integer_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - endlink)); + = build_function_type_list (integer_type_node, + V4SF_type_node, NULL_TREE); + tree int64_ftype_v4sf + = build_function_type_list (long_long_integer_type_node, + V4SF_type_node, NULL_TREE); tree int_ftype_v8qi - = build_function_type (integer_type_node, - tree_cons (NULL_TREE, V8QI_type_node, - endlink)); + = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); tree v4sf_ftype_v4sf_int - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, integer_type_node, - endlink))); + = build_function_type_list (V4SF_type_node, + V4SF_type_node, integer_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_int64 + = build_function_type_list (V4SF_type_node, + V4SF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2si - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - endlink))); + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V2SI_type_node, NULL_TREE); tree int_ftype_v4hi_int - = build_function_type (integer_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, integer_type_node, - endlink))); + = build_function_type_list (integer_type_node, + V4HI_type_node, integer_type_node, NULL_TREE); tree v4hi_ftype_v4hi_int_int - = build_function_type (V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, integer_type_node, - tree_cons (NULL_TREE, - integer_type_node, - endlink)))); + = build_function_type_list (V4HI_type_node, V4HI_type_node, + integer_type_node, integer_type_node, + NULL_TREE); /* Miscellaneous. */ tree v8qi_ftype_v4hi_v4hi - = build_function_type (V8QI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - endlink))); + = build_function_type_list (V8QI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); tree v4hi_ftype_v2si_v2si - = build_function_type (V4HI_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - endlink))); + = build_function_type_list (V4HI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); tree v4sf_ftype_v4sf_v4sf_int - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, - integer_type_node, - endlink)))); - tree v4hi_ftype_v8qi_v8qi - = build_function_type (V4HI_type_node, - tree_cons (NULL_TREE, V8QI_type_node, - tree_cons (NULL_TREE, V8QI_type_node, - endlink))); + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V4SF_type_node, + integer_type_node, NULL_TREE); tree v2si_ftype_v4hi_v4hi - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - endlink))); + = build_function_type_list (V2SI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); tree v4hi_ftype_v4hi_int - = build_function_type (V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, integer_type_node, - endlink))); + = build_function_type_list (V4HI_type_node, + V4HI_type_node, integer_type_node, NULL_TREE); tree v4hi_ftype_v4hi_di - = build_function_type (V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, - long_long_integer_type_node, - endlink))); + = build_function_type_list (V4HI_type_node, + V4HI_type_node, long_long_unsigned_type_node, + NULL_TREE); tree v2si_ftype_v2si_di - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - tree_cons (NULL_TREE, - long_long_integer_type_node, - endlink))); + = build_function_type_list (V2SI_type_node, + V2SI_type_node, long_long_unsigned_type_node, + NULL_TREE); tree void_ftype_void - = build_function_type (void_type_node, endlink); + = build_function_type (void_type_node, void_list_node); tree void_ftype_unsigned - = build_function_type (void_type_node, - tree_cons (NULL_TREE, unsigned_type_node, - endlink)); + = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); tree unsigned_ftype_void - = build_function_type (unsigned_type_node, endlink); + = build_function_type (unsigned_type_node, void_list_node); tree di_ftype_void - = build_function_type (long_long_unsigned_type_node, endlink); + = build_function_type (long_long_unsigned_type_node, void_list_node); tree v4sf_ftype_void - = build_function_type (V4SF_type_node, endlink); + = build_function_type (V4SF_type_node, void_list_node); tree v2si_ftype_v4sf - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - endlink)); + = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); /* Loads/stores. */ - tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node, - tree_cons (NULL_TREE, V8QI_type_node, - tree_cons (NULL_TREE, - pchar_type_node, - endlink))); tree void_ftype_v8qi_v8qi_pchar - = build_function_type (void_type_node, maskmovq_args); - tree v4sf_ftype_pfloat - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, pfloat_type_node, - endlink)); + = build_function_type_list (void_type_node, + V8QI_type_node, V8QI_type_node, + pchar_type_node, NULL_TREE); + tree v4sf_ftype_pcfloat + = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); /* @@@ the type is bogus */ tree v4sf_ftype_v4sf_pv2si - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, pv2si_type_node, - endlink))); + = build_function_type_list (V4SF_type_node, + V4SF_type_node, pv2si_type_node, NULL_TREE); tree void_ftype_pv2si_v4sf - = build_function_type (void_type_node, - tree_cons (NULL_TREE, pv2si_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - endlink))); + = build_function_type_list (void_type_node, + pv2si_type_node, V4SF_type_node, NULL_TREE); tree void_ftype_pfloat_v4sf - = build_function_type (void_type_node, - tree_cons (NULL_TREE, pfloat_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - endlink))); + = build_function_type_list (void_type_node, + pfloat_type_node, V4SF_type_node, NULL_TREE); tree void_ftype_pdi_di - = build_function_type (void_type_node, - tree_cons (NULL_TREE, pdi_type_node, - tree_cons (NULL_TREE, - long_long_unsigned_type_node, - endlink))); + = build_function_type_list (void_type_node, + pdi_type_node, long_long_unsigned_type_node, + NULL_TREE); + tree void_ftype_pv2di_v2di + = build_function_type_list (void_type_node, + pv2di_type_node, V2DI_type_node, NULL_TREE); /* Normal vector unops. */ tree v4sf_ftype_v4sf - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - endlink)); + = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); /* Normal vector binops. */ tree v4sf_ftype_v4sf_v4sf - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - tree_cons (NULL_TREE, V4SF_type_node, - endlink))); + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V4SF_type_node, NULL_TREE); tree v8qi_ftype_v8qi_v8qi - = build_function_type (V8QI_type_node, - tree_cons (NULL_TREE, V8QI_type_node, - tree_cons (NULL_TREE, V8QI_type_node, - endlink))); + = build_function_type_list (V8QI_type_node, + V8QI_type_node, V8QI_type_node, NULL_TREE); tree v4hi_ftype_v4hi_v4hi - = build_function_type (V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - tree_cons (NULL_TREE, V4HI_type_node, - endlink))); + = build_function_type_list (V4HI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); tree v2si_ftype_v2si_v2si - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - endlink))); + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); tree di_ftype_di_di - = build_function_type (long_long_unsigned_type_node, - tree_cons (NULL_TREE, long_long_unsigned_type_node, - tree_cons (NULL_TREE, - long_long_unsigned_type_node, - endlink))); + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, NULL_TREE); tree v2si_ftype_v2sf - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V2SF_type_node, - endlink)); + = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); tree v2sf_ftype_v2si - = build_function_type (V2SF_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - endlink)); + = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); tree v2si_ftype_v2si - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - endlink)); + = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); tree v2sf_ftype_v2sf - = build_function_type (V2SF_type_node, - tree_cons (NULL_TREE, V2SF_type_node, - endlink)); + = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); tree v2sf_ftype_v2sf_v2sf - = build_function_type (V2SF_type_node, - tree_cons (NULL_TREE, V2SF_type_node, - tree_cons (NULL_TREE, - V2SF_type_node, - endlink))); + = build_function_type_list (V2SF_type_node, + V2SF_type_node, V2SF_type_node, NULL_TREE); tree v2si_ftype_v2sf_v2sf - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, V2SF_type_node, - tree_cons (NULL_TREE, - V2SF_type_node, - endlink))); + = build_function_type_list (V2SI_type_node, + V2SF_type_node, V2SF_type_node, NULL_TREE); + tree pint_type_node = build_pointer_type (integer_type_node); + tree pcint_type_node = build_pointer_type ( + build_type_variant (integer_type_node, 1, 0)); + tree pdouble_type_node = build_pointer_type (double_type_node); + tree pcdouble_type_node = build_pointer_type ( + build_type_variant (double_type_node, 1, 0)); + tree int_ftype_v2df_v2df + = build_function_type_list (integer_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); + + tree ti_ftype_void + = build_function_type (intTI_type_node, void_list_node); + tree v2di_ftype_void + = build_function_type (V2DI_type_node, void_list_node); + tree ti_ftype_ti_ti + = build_function_type_list (intTI_type_node, + intTI_type_node, intTI_type_node, NULL_TREE); + tree void_ftype_pcvoid + = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); + tree v2di_ftype_di + = build_function_type_list (V2DI_type_node, + long_long_unsigned_type_node, NULL_TREE); + tree di_ftype_v2di + = build_function_type_list (long_long_unsigned_type_node, + V2DI_type_node, NULL_TREE); + tree v4sf_ftype_v4si + = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); + tree v4si_ftype_v4sf + = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); + tree v2df_ftype_v4si + = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); + tree v4si_ftype_v2df + = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); + tree v2si_ftype_v2df + = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); + tree v4sf_ftype_v2df + = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2si + = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); + tree v2df_ftype_v4sf + = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); + tree int_ftype_v2df + = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); + tree int64_ftype_v2df + = build_function_type_list (long_long_integer_type_node, + V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2df_int + = build_function_type_list (V2DF_type_node, + V2DF_type_node, integer_type_node, NULL_TREE); + tree v2df_ftype_v2df_int64 + = build_function_type_list (V2DF_type_node, + V2DF_type_node, long_long_integer_type_node, + NULL_TREE); + tree v4sf_ftype_v4sf_v2df + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2df_v4sf + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V4SF_type_node, NULL_TREE); + tree v2df_ftype_v2df_v2df_int + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V2DF_type_node, + integer_type_node, + NULL_TREE); + tree v2df_ftype_v2df_pv2si + = build_function_type_list (V2DF_type_node, + V2DF_type_node, pv2si_type_node, NULL_TREE); + tree void_ftype_pv2si_v2df + = build_function_type_list (void_type_node, + pv2si_type_node, V2DF_type_node, NULL_TREE); + tree void_ftype_pdouble_v2df + = build_function_type_list (void_type_node, + pdouble_type_node, V2DF_type_node, NULL_TREE); + tree void_ftype_pint_int + = build_function_type_list (void_type_node, + pint_type_node, integer_type_node, NULL_TREE); + tree void_ftype_v16qi_v16qi_pchar + = build_function_type_list (void_type_node, + V16QI_type_node, V16QI_type_node, + pchar_type_node, NULL_TREE); + tree v2df_ftype_pcdouble + = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); + tree v2df_ftype_v2df_v2df + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v16qi_ftype_v16qi_v16qi + = build_function_type_list (V16QI_type_node, + V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v8hi_ftype_v8hi_v8hi + = build_function_type_list (V8HI_type_node, + V8HI_type_node, V8HI_type_node, NULL_TREE); + tree v4si_ftype_v4si_v4si + = build_function_type_list (V4SI_type_node, + V4SI_type_node, V4SI_type_node, NULL_TREE); + tree v2di_ftype_v2di_v2di + = build_function_type_list (V2DI_type_node, + V2DI_type_node, V2DI_type_node, NULL_TREE); + tree v2di_ftype_v2df_v2df + = build_function_type_list (V2DI_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2df + = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_double + = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE); + tree v2df_ftype_double_double + = build_function_type_list (V2DF_type_node, + double_type_node, double_type_node, NULL_TREE); + tree int_ftype_v8hi_int + = build_function_type_list (integer_type_node, + V8HI_type_node, integer_type_node, NULL_TREE); + tree v8hi_ftype_v8hi_int_int + = build_function_type_list (V8HI_type_node, + V8HI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v2di_ftype_v2di_int + = build_function_type_list (V2DI_type_node, + V2DI_type_node, integer_type_node, NULL_TREE); + tree v4si_ftype_v4si_int + = build_function_type_list (V4SI_type_node, + V4SI_type_node, integer_type_node, NULL_TREE); + tree v8hi_ftype_v8hi_int + = build_function_type_list (V8HI_type_node, + V8HI_type_node, integer_type_node, NULL_TREE); + tree v8hi_ftype_v8hi_v2di + = build_function_type_list (V8HI_type_node, + V8HI_type_node, V2DI_type_node, NULL_TREE); + tree v4si_ftype_v4si_v2di + = build_function_type_list (V4SI_type_node, + V4SI_type_node, V2DI_type_node, NULL_TREE); + tree v4si_ftype_v8hi_v8hi + = build_function_type_list (V4SI_type_node, + V8HI_type_node, V8HI_type_node, NULL_TREE); + tree di_ftype_v8qi_v8qi + = build_function_type_list (long_long_unsigned_type_node, + V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v2di_ftype_v16qi_v16qi + = build_function_type_list (V2DI_type_node, + V16QI_type_node, V16QI_type_node, NULL_TREE); + tree int_ftype_v16qi + = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); + tree v16qi_ftype_pcchar + = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); + tree void_ftype_pchar_v16qi + = build_function_type_list (void_type_node, + pchar_type_node, V16QI_type_node, NULL_TREE); + tree v4si_ftype_pcint + = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); + tree void_ftype_pcint_v4si + = build_function_type_list (void_type_node, + pcint_type_node, V4SI_type_node, NULL_TREE); + tree v2di_ftype_v2di + = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); /* Add all builtins that are more or less simple operations on two operands. */ - for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) { /* Use one of the operands; the target can have a different mode for mask-generating compares. */ @@ -11334,6 +12731,24 @@ ix86_init_mmx_sse_builtins () switch (mode) { + case V16QImode: + type = v16qi_ftype_v16qi_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi_v8hi; + break; + case V4SImode: + type = v4si_ftype_v4si_v4si; + break; + case V2DImode: + type = v2di_ftype_v2di_v2di; + break; + case V2DFmode: + type = v2df_ftype_v2df_v2df; + break; + case TImode: + type = ti_ftype_ti_ti; + break; case V4SFmode: type = v4sf_ftype_v4sf_v4sf; break; @@ -11361,14 +12776,18 @@ ix86_init_mmx_sse_builtins () || d->icode == CODE_FOR_vmmaskncmpv4sf3) type = v4si_ftype_v4sf_v4sf; + if (d->icode == CODE_FOR_maskcmpv2df3 + || d->icode == CODE_FOR_maskncmpv2df3 + || d->icode == CODE_FOR_vmmaskcmpv2df3 + || d->icode == CODE_FOR_vmmaskncmpv2df3) + type = v2di_ftype_v2df_v2df; + def_builtin (d->mask, d->name, type, d->code); } /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); - def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); - def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); @@ -11384,59 +12803,62 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); /* comi/ucomi insns. */ - for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) - def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) + if (d->mask == MASK_SSE2) + def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); + else + def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); - def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); - def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); - def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); - def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); - def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); - def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - - def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); - def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); - def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); - def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); - - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); - - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - - def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); - def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); - def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); - def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); - - def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); - def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); - def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); - - def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); - def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); - - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); - - def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); - def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); - def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); - def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); - def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); - def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); - - def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); + def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); + def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); + def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); + def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); + + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); + + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); + + def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); + def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); + + def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); + + def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); + def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); + + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); + + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); + + def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); + def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); + def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); + def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); + def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); + def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); + + def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); /* Original 3DNow! */ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); @@ -11468,7 +12890,116 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); - def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); + def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); + + /* SSE2 */ + def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); + def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128); + + def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); + + def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); + def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); + + def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); + def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); + def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); + def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); + def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); + def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); + def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); + def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); + + def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); + def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); + + def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); + + def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); + def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); + def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); + def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); + + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); + def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); + def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); + def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); + + def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); + + def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); + def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128); + + def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); + def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); + + def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -11519,6 +13050,13 @@ ix86_expand_binop_builtin (icode, arglist, target) || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); + if (GET_MODE (op1) == SImode && mode1 == TImode) + { + rtx x = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadd (x, op1)); + op1 = gen_lowpart (TImode, x); + } + /* In case the insn wants input operands in modes different from the result, abort. */ if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) @@ -11542,45 +13080,6 @@ ix86_expand_binop_builtin (icode, arglist, target) return target; } -/* In type_for_mode we restrict the ability to create TImode types - to hosts with 64-bit H_W_I. So we've defined the SSE logicals - to have a V4SFmode signature. Convert them in-place to TImode. */ - -static rtx -ix86_expand_timode_binop_builtin (icode, arglist, target) - enum insn_code icode; - tree arglist; - rtx target; -{ - rtx pat; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); - - op0 = gen_lowpart (TImode, op0); - op1 = gen_lowpart (TImode, op1); - target = gen_reg_rtx (TImode); - - if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) - op0 = copy_to_mode_reg (TImode, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) - op1 = copy_to_mode_reg (TImode, op1); - - /* In the commutative cases, both op0 and op1 are nonimmediate_operand, - yet one of the two must not be a memory. This is normally enforced - by expanders, but we didn't bother to create one here. */ - if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) - op0 = copy_to_mode_reg (TImode, op0); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - - return gen_lowpart (V4SFmode, target); -} - /* Subroutine of ix86_expand_builtin to take care of stores. */ static rtx @@ -11600,9 +13099,7 @@ ix86_expand_store_builtin (icode, arglist) op1 = safe_vector_operand (op1, mode1); op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - - if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); + op1 = copy_to_mode_reg (mode1, op1); pat = GEN_FCN (icode) (op0, op1); if (pat) @@ -11672,11 +13169,11 @@ ix86_expand_unop1_builtin (icode, arglist, target) if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - + op1 = op0; if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) op1 = copy_to_mode_reg (mode0, op1); - + pat = GEN_FCN (icode) (target, op0, op1); if (! pat) return 0; @@ -11778,14 +13275,14 @@ ix86_expand_sse_comi (d, arglist, target) op1 = copy_to_mode_reg (mode1, op1); op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); - pat = GEN_FCN (d->icode) (op0, op1, op2); + pat = GEN_FCN (d->icode) (op0, op1); if (! pat) return 0; emit_insn (pat); emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), gen_rtx_fmt_ee (comparison, QImode, - gen_rtx_REG (CCmode, FLAGS_REG), + SET_DEST (pat), const0_rtx))); return SUBREG_REG (target); @@ -11826,7 +13323,10 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return 0; case IX86_BUILTIN_PEXTRW: - icode = CODE_FOR_mmx_pextrw; + case IX86_BUILTIN_PEXTRW128: + icode = (fcode == IX86_BUILTIN_PEXTRW + ? CODE_FOR_mmx_pextrw + : CODE_FOR_sse2_pextrw); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -11854,7 +13354,10 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_PINSRW: - icode = CODE_FOR_mmx_pinsrw; + case IX86_BUILTIN_PINSRW128: + icode = (fcode == IX86_BUILTIN_PINSRW + ? CODE_FOR_mmx_pinsrw + : CODE_FOR_sse2_pinsrw); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); @@ -11887,7 +13390,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_MASKMOVQ: - icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq; + case IX86_BUILTIN_MASKMOVDQU: + icode = (fcode == IX86_BUILTIN_MASKMOVQ + ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) + : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 + : CODE_FOR_sse2_maskmovdqu)); /* Note the arg order is different from the operand order. */ arg1 = TREE_VALUE (arglist); arg2 = TREE_VALUE (TREE_CHAIN (arglist)); @@ -11918,19 +13425,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_RCPSS: return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); - case IX86_BUILTIN_ANDPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, - arglist, target); - case IX86_BUILTIN_ANDNPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, - arglist, target); - case IX86_BUILTIN_ORPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, - arglist, target); - case IX86_BUILTIN_XORPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, - arglist, target); - case IX86_BUILTIN_LOADAPS: return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); @@ -11939,6 +13433,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STOREAPS: return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); + case IX86_BUILTIN_STOREUPS: return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); @@ -11950,8 +13445,12 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_LOADHPS: case IX86_BUILTIN_LOADLPS: - icode = (fcode == IX86_BUILTIN_LOADHPS - ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); + case IX86_BUILTIN_LOADHPD: + case IX86_BUILTIN_LOADLPD: + icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps + : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps + : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd + : CODE_FOR_sse2_movlpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -11975,8 +13474,12 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STOREHPS: case IX86_BUILTIN_STORELPS: - icode = (fcode == IX86_BUILTIN_STOREHPS - ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); + case IX86_BUILTIN_STOREHPD: + case IX86_BUILTIN_STORELPD: + icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps + : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps + : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd + : CODE_FOR_sse2_movlpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -12012,7 +13515,10 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return copy_to_mode_reg (SImode, target); case IX86_BUILTIN_SHUFPS: - icode = CODE_FOR_sse_shufps; + case IX86_BUILTIN_SHUFPD: + icode = (fcode == IX86_BUILTIN_SHUFPS + ? CODE_FOR_sse_shufps + : CODE_FOR_sse2_shufpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); @@ -12045,7 +13551,13 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_PSHUFW: - icode = CODE_FOR_mmx_pshufw; + case IX86_BUILTIN_PSHUFD: + case IX86_BUILTIN_PSHUFHW: + case IX86_BUILTIN_PSHUFLW: + icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw + : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw + : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd + : CODE_FOR_mmx_pshufw); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -12072,6 +13584,35 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) emit_insn (pat); return target; + case IX86_BUILTIN_PSLLDQI128: + case IX86_BUILTIN_PSRLDQI128: + icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 + : CODE_FOR_sse2_lshrti3); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); + } + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + target = gen_reg_rtx (V2DImode); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + case IX86_BUILTIN_FEMMS: emit_insn (gen_femms ()); return NULL_RTX; @@ -12161,28 +13702,138 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) emit_insn (gen_mmx_clrdi (target)); return target; + case IX86_BUILTIN_CLRTI: + target = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0))); + return target; + + + case IX86_BUILTIN_SQRTSD: + return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target); + case IX86_BUILTIN_LOADAPD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1); + case IX86_BUILTIN_LOADUPD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); + + case IX86_BUILTIN_STOREAPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + case IX86_BUILTIN_STOREUPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); + + case IX86_BUILTIN_LOADSD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1); + + case IX86_BUILTIN_STORESD: + return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist); + + case IX86_BUILTIN_SETPD1: + target = assign_386_stack_local (DFmode, 0); + arg0 = TREE_VALUE (arglist); + emit_move_insn (adjust_address (target, DFmode, 0), + expand_expr (arg0, NULL_RTX, VOIDmode, 0)); + op0 = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0))); + emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0))); + return op0; + + case IX86_BUILTIN_SETPD: + target = assign_386_stack_local (V2DFmode, 0); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + emit_move_insn (adjust_address (target, DFmode, 0), + expand_expr (arg0, NULL_RTX, VOIDmode, 0)); + emit_move_insn (adjust_address (target, DFmode, 8), + expand_expr (arg1, NULL_RTX, VOIDmode, 0)); + op0 = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse2_movapd (op0, target)); + return op0; + + case IX86_BUILTIN_LOADRPD: + target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, + gen_reg_rtx (V2DFmode), 1); + emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1))); + return target; + + case IX86_BUILTIN_LOADPD1: + target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, + gen_reg_rtx (V2DFmode), 1); + emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx)); + return target; + + case IX86_BUILTIN_STOREPD1: + return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + case IX86_BUILTIN_STORERPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + + case IX86_BUILTIN_CLRPD: + target = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse_clrv2df (target)); + return target; + + case IX86_BUILTIN_MFENCE: + emit_insn (gen_sse2_mfence ()); + return 0; + case IX86_BUILTIN_LFENCE: + emit_insn (gen_sse2_lfence ()); + return 0; + + case IX86_BUILTIN_CLFLUSH: + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + icode = CODE_FOR_sse2_clflush; + if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) + op0 = copy_to_mode_reg (Pmode, op0); + + emit_insn (gen_sse2_clflush (op0)); + return 0; + + case IX86_BUILTIN_MOVNTPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); + case IX86_BUILTIN_MOVNTDQ: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); + case IX86_BUILTIN_MOVNTI: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); + + case IX86_BUILTIN_LOADDQA: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1); + case IX86_BUILTIN_LOADDQU: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); + case IX86_BUILTIN_LOADD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1); + + case IX86_BUILTIN_STOREDQA: + return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist); + case IX86_BUILTIN_STOREDQU: + return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); + case IX86_BUILTIN_STORED: + return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); + default: break; } - for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) if (d->code == fcode) { /* Compares are treated specially. */ if (d->icode == CODE_FOR_maskcmpv4sf3 || d->icode == CODE_FOR_vmmaskcmpv4sf3 || d->icode == CODE_FOR_maskncmpv4sf3 - || d->icode == CODE_FOR_vmmaskncmpv4sf3) + || d->icode == CODE_FOR_vmmaskncmpv4sf3 + || d->icode == CODE_FOR_maskcmpv2df3 + || d->icode == CODE_FOR_vmmaskcmpv2df3 + || d->icode == CODE_FOR_maskncmpv2df3 + || d->icode == CODE_FOR_vmmaskncmpv2df3) return ix86_expand_sse_compare (d, arglist, target); return ix86_expand_binop_builtin (d->icode, arglist, target); } - for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++) + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) if (d->code == fcode) return ix86_expand_unop_builtin (d->icode, arglist, target, 0); - for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) if (d->code == fcode) return ix86_expand_sse_comi (d, arglist, target); @@ -12304,6 +13955,8 @@ ix86_preferred_reload_class (x, class) rtx x; enum reg_class class; { + if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x))) + return NO_REGS; if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) { /* SSE can't load any constant directly yet. */ @@ -12435,9 +14088,10 @@ ix86_hard_regno_mode_ok (regno, mode) if (FP_REGNO_P (regno)) return VALID_FP_MODE_P (mode); if (SSE_REGNO_P (regno)) - return VALID_SSE_REG_MODE (mode); + return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0); if (MMX_REGNO_P (regno)) - return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); + return (TARGET_MMX + ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0); /* We handle both integer and floats in the general purpose registers. In future we should be able to handle vector modes as well. */ if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) @@ -12538,11 +14192,12 @@ ix86_memory_move_cost (mode, class, in) if (mode == TFmode) mode = XFmode; return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) - * (int) GET_MODE_SIZE (mode) / 4); + * ((int) GET_MODE_SIZE (mode) + + UNITS_PER_WORD -1 ) / UNITS_PER_WORD); } } -#ifdef DO_GLOBAL_CTORS_BODY +#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) static void ix86_svr3_asm_out_constructor (symbol, priority) rtx symbol; @@ -12555,6 +14210,71 @@ ix86_svr3_asm_out_constructor (symbol, priority) } #endif +#if TARGET_MACHO + +static int current_machopic_label_num; + +/* Given a symbol name and its associated stub, write out the + definition of the stub. */ + +void +machopic_output_stub (file, symb, stub) + FILE *file; + const char *symb, *stub; +{ + unsigned int length; + char *binder_name, *symbol_name, lazy_ptr_name[32]; + int label = ++current_machopic_label_num; + + /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ + symb = (*targetm.strip_name_encoding) (symb); + + length = strlen (stub); + binder_name = alloca (length + 32); + GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); + + length = strlen (symb); + symbol_name = alloca (length + 32); + GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); + + sprintf (lazy_ptr_name, "L%d$lz", label); + + if (MACHOPIC_PURE) + machopic_picsymbol_stub_section (); + else + machopic_symbol_stub_section (); + + fprintf (file, "%s:\n", stub); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + + if (MACHOPIC_PURE) + { + fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label); + fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); + fprintf (file, "\tjmp %%edx\n"); + } + else + fprintf (file, "\tjmp *%s\n", lazy_ptr_name); + + fprintf (file, "%s:\n", binder_name); + + if (MACHOPIC_PURE) + { + fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); + fprintf (file, "\tpushl %%eax\n"); + } + else + fprintf (file, "\t pushl $%s\n", lazy_ptr_name); + + fprintf (file, "\tjmp dyld_stub_binding_helper\n"); + + machopic_lazy_symbol_ptr_section (); + fprintf (file, "%s:\n", lazy_ptr_name); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + fprintf (file, "\t.long %s\n", binder_name); +} +#endif /* TARGET_MACHO */ + /* Order the registers for register allocator. */ void @@ -12578,7 +14298,7 @@ x86_order_regs_for_local_alloc () if (!TARGET_SSE_MATH) for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) reg_alloc_order [pos++] = i; - + /* SSE registers. */ for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) reg_alloc_order [pos++] = i; @@ -12599,77 +14319,191 @@ x86_order_regs_for_local_alloc () reg_alloc_order [pos++] = 0; } -void -x86_output_mi_thunk (file, delta, function) - FILE *file; - int delta; +/* Returns an expression indicating where the this parameter is + located on entry to the FUNCTION. */ + +static rtx +x86_this_parameter (function) tree function; { - tree parm; - rtx xops[3]; + tree type = TREE_TYPE (function); + + if (TARGET_64BIT) + { + int n = aggregate_value_p (TREE_TYPE (type)) != 0; + return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); + } + + if (ix86_fntype_regparm (type) > 0) + { + tree parm; + + parm = TYPE_ARG_TYPES (type); + /* Figure out whether or not the function has a variable number of + arguments. */ + for (; parm; parm = TREE_CHAIN (parm)) + if (TREE_VALUE (parm) == void_type_node) + break; + /* If not, the this parameter is in %eax. */ + if (parm) + return gen_rtx_REG (SImode, 0); + } - if (ix86_regparm > 0) - parm = TYPE_ARG_TYPES (TREE_TYPE (function)); + if (aggregate_value_p (TREE_TYPE (type))) + return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); else - parm = NULL_TREE; - for (; parm; parm = TREE_CHAIN (parm)) - if (TREE_VALUE (parm) == void_type_node) - break; + return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); +} - xops[0] = GEN_INT (delta); +/* Determine whether x86_output_mi_thunk can succeed. */ + +static bool +x86_can_output_mi_thunk (thunk, delta, vcall_offset, function) + tree thunk ATTRIBUTE_UNUSED; + HOST_WIDE_INT delta ATTRIBUTE_UNUSED; + HOST_WIDE_INT vcall_offset; + tree function; +{ + /* 64-bit can handle anything. */ if (TARGET_64BIT) + return true; + + /* For 32-bit, everything's fine if we have one free register. */ + if (ix86_fntype_regparm (TREE_TYPE (function)) < 3) + return true; + + /* Need a free register for vcall_offset. */ + if (vcall_offset) + return false; + + /* Need a free register for GOT references. */ + if (flag_pic && !(*targetm.binds_local_p) (function)) + return false; + + /* Otherwise ok. */ + return true; +} + +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is non-zero, the word at + *(*this + vcall_offset) should be added to THIS. */ + +static void +x86_output_mi_thunk (file, thunk, delta, vcall_offset, function) + FILE *file ATTRIBUTE_UNUSED; + tree thunk ATTRIBUTE_UNUSED; + HOST_WIDE_INT delta; + HOST_WIDE_INT vcall_offset; + tree function; +{ + rtx xops[3]; + rtx this = x86_this_parameter (function); + rtx this_reg, tmp; + + /* If VCALL_OFFSET, we'll need THIS in a register. Might as well + pull it in now and let DELTA benefit. */ + if (REG_P (this)) + this_reg = this; + else if (vcall_offset) { - int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0; - xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); - output_asm_insn ("add{q} {%0, %1|%1, %0}", xops); - if (flag_pic) + /* Put the this parameter into %eax. */ + xops[0] = this; + xops[1] = this_reg = gen_rtx_REG (Pmode, 0); + output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); + } + else + this_reg = NULL_RTX; + + /* Adjust the this parameter by a fixed constant. */ + if (delta) + { + xops[0] = GEN_INT (delta); + xops[1] = this_reg ? this_reg : this; + if (TARGET_64BIT) { - fprintf (file, "\tjmp *"); - assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); - fprintf (file, "@GOTPCREL(%%rip)\n"); + if (!x86_64_general_operand (xops[0], DImode)) + { + tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); + xops[1] = tmp; + output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); + xops[0] = tmp; + xops[1] = this; + } + output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); } else - { - fprintf (file, "\tjmp "); - assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); - fprintf (file, "\n"); - } + output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); } - else + + /* Adjust the this parameter by a value stored in the vtable. */ + if (vcall_offset) { - if (parm) - xops[1] = gen_rtx_REG (SImode, 0); - else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)))) - xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); + if (TARGET_64BIT) + tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); else - xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); - output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); + tmp = gen_rtx_REG (SImode, 2 /* ECX */); - if (flag_pic) + xops[0] = gen_rtx_MEM (Pmode, this_reg); + xops[1] = tmp; + if (TARGET_64BIT) + output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); + else + output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); + + /* Adjust the this parameter. */ + xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); + if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) { - xops[0] = pic_offset_table_rtx; - xops[1] = gen_label_rtx (); - xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); + xops[0] = GEN_INT (vcall_offset); + xops[1] = tmp2; + output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); + xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); + } + xops[1] = this_reg; + if (TARGET_64BIT) + output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); + else + output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); + } - if (ix86_regparm > 2) - abort (); - output_asm_insn ("push{l}\t%0", xops); - output_asm_insn ("call\t%P1", xops); - ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1])); - output_asm_insn ("pop{l}\t%0", xops); - output_asm_insn - ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); - xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0)); - output_asm_insn - ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops); - asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n"); - asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n"); + /* If necessary, drop THIS back to its stack slot. */ + if (this_reg && this_reg != this) + { + xops[0] = this_reg; + xops[1] = this; + output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); + } + + xops[0] = DECL_RTL (function); + if (TARGET_64BIT) + { + if (!flag_pic || (*targetm.binds_local_p) (function)) + output_asm_insn ("jmp\t%P0", xops); + else + { + tmp = XEXP (xops[0], 0); + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL); + tmp = gen_rtx_CONST (Pmode, tmp); + tmp = gen_rtx_MEM (QImode, tmp); + xops[0] = tmp; + output_asm_insn ("jmp\t%A0", xops); } + } + else + { + if (!flag_pic || (*targetm.binds_local_p) (function)) + output_asm_insn ("jmp\t%P0", xops); else { - fprintf (file, "\tjmp "); - assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); - fprintf (file, "\n"); + tmp = gen_rtx_REG (SImode, 2 /* ECX */); + output_set_got (tmp); + + xops[1] = tmp; + output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); + output_asm_insn ("jmp\t{*}%1", xops); } } } @@ -12692,3 +14526,88 @@ x86_field_alignment (field, computed) return MIN (32, computed); return computed; } + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ +void +x86_function_profiler (file, labelno) + FILE *file; + int labelno; +{ + if (TARGET_64BIT) + if (flag_pic) + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); +#endif + fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); + } + else + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); +#endif + fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); + } + else if (flag_pic) + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", + LPREFIX, labelno, PROFILE_COUNT_REGISTER); +#endif + fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); + } + else + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, + PROFILE_COUNT_REGISTER); +#endif + fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); + } +} + +/* Implement machine specific optimizations. + At the moment we implement single transformation: AMD Athlon works faster + when RET is not destination of conditional jump or directly preceeded + by other jump instruction. We avoid the penalty by inserting NOP just + before the RET instructions in such cases. */ +void +x86_machine_dependent_reorg (first) + rtx first ATTRIBUTE_UNUSED; +{ + edge e; + + if (!TARGET_ATHLON || !optimize || optimize_size) + return; + for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) + { + basic_block bb = e->src; + rtx ret = bb->end; + rtx prev; + bool insert = false; + + if (!returnjump_p (ret) || !maybe_hot_bb_p (bb)) + continue; + prev = prev_nonnote_insn (ret); + if (prev && GET_CODE (prev) == CODE_LABEL) + { + edge e; + for (e = bb->pred; e; e = e->pred_next) + if (EDGE_FREQUENCY (e) && e->src->index > 0 + && !(e->flags & EDGE_FALLTHRU)) + insert = 1; + } + if (!insert) + { + prev = prev_real_insn (ret); + if (prev && GET_CODE (prev) == JUMP_INSN + && any_condjump_p (prev)) + insert = 1; + } + if (insert) + emit_insn_before (gen_nop (), ret); + } +} + +#include "gt-i386.h" diff --git a/contrib/gcc/config/i386/i386.h b/contrib/gcc/config/i386/i386.h index 7ab9063..ead133b 100644 --- a/contrib/gcc/config/i386/i386.h +++ b/contrib/gcc/config/i386/i386.h @@ -34,20 +34,6 @@ Boston, MA 02111-1307, USA. */ ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many that start with ASM_ or end in ASM_OP. */ -/* Stubs for half-pic support if not OSF/1 reference platform. */ - -#ifndef HALF_PIC_P -#define HALF_PIC_P() 0 -#define HALF_PIC_NUMBER_PTRS 0 -#define HALF_PIC_NUMBER_REFS 0 -#define HALF_PIC_ENCODE(DECL) -#define HALF_PIC_DECLARE(NAME) -#define HALF_PIC_INIT() error ("half-pic init called on systems that don't support it") -#define HALF_PIC_ADDRESS_P(X) 0 -#define HALF_PIC_PTR(X) (X) -#define HALF_PIC_FINISH(STREAM) -#endif - /* Define the specific costs for a given cpu */ struct processor_costs { @@ -89,6 +75,12 @@ struct processor_costs { const int prefetch_block; /* bytes moved to cache for prefetch. */ const int simultaneous_prefetches; /* number of parallel prefetch operations. */ + const int fadd; /* cost of FADD and FSUB instructions. */ + const int fmul; /* cost of FMUL instruction. */ + const int fdiv; /* cost of FDIV instruction. */ + const int fabs; /* cost of FABS instruction. */ + const int fchs; /* cost of FCHS instruction. */ + const int fsqrt; /* cost of FSQRT instruction. */ }; extern const struct processor_costs *ix86_cost; @@ -119,19 +111,16 @@ extern int target_flags; #define MASK_INLINE_ALL_STROPS 0x00000400 /* Inline stringops in all cases */ #define MASK_NO_PUSH_ARGS 0x00000800 /* Use push instructions */ #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00001000/* Accumulate outgoing args */ -#define MASK_ACCUMULATE_OUTGOING_ARGS_SET 0x00002000 -#define MASK_MMX 0x00004000 /* Support MMX regs/builtins */ -#define MASK_MMX_SET 0x00008000 -#define MASK_SSE 0x00010000 /* Support SSE regs/builtins */ -#define MASK_SSE_SET 0x00020000 -#define MASK_SSE2 0x00040000 /* Support SSE2 regs/builtins */ -#define MASK_SSE2_SET 0x00080000 -#define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */ -#define MASK_3DNOW_SET 0x00200000 -#define MASK_3DNOW_A 0x00400000 /* Support Athlon 3Dnow builtins */ -#define MASK_3DNOW_A_SET 0x00800000 -#define MASK_128BIT_LONG_DOUBLE 0x01000000 /* long double size is 128bit */ -#define MASK_64BIT 0x02000000 /* Produce 64bit code */ +#define MASK_MMX 0x00002000 /* Support MMX regs/builtins */ +#define MASK_SSE 0x00004000 /* Support SSE regs/builtins */ +#define MASK_SSE2 0x00008000 /* Support SSE2 regs/builtins */ +#define MASK_3DNOW 0x00010000 /* Support 3Dnow builtins */ +#define MASK_3DNOW_A 0x00020000 /* Support Athlon 3Dnow builtins */ +#define MASK_128BIT_LONG_DOUBLE 0x00040000 /* long double size is 128bit */ +#define MASK_64BIT 0x00080000 /* Produce 64bit code */ + +/* Unused: 0x03f0000 */ + /* ... overlap with subtarget options starts by 0x04000000. */ #define MASK_NO_RED_ZONE 0x04000000 /* Do not use red zone */ @@ -140,7 +129,7 @@ extern int target_flags; /* Compile using ret insn that pops args. This will not work unless you use prototypes at least - for all functions that can take varying numbers of args. */ + for all functions that can take varying numbers of args. */ #define TARGET_RTD (target_flags & MASK_RTD) /* Align doubles to a two word boundary. This breaks compatibility with @@ -188,16 +177,25 @@ extern int target_flags; /* Debug FUNCTION_ARG macros */ #define TARGET_DEBUG_ARG (ix86_debug_arg_string != 0) -/* 64bit Sledgehammer mode */ +/* 64bit Sledgehammer mode. For libgcc2 we make sure this is a + compile-time constant. */ +#ifdef IN_LIBGCC2 +#ifdef __x86_64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#else #ifdef TARGET_BI_ARCH #define TARGET_64BIT (target_flags & MASK_64BIT) #else -#ifdef TARGET_64BIT_DEFAULT +#if TARGET_64BIT_DEFAULT #define TARGET_64BIT 1 #else #define TARGET_64BIT 0 #endif #endif +#endif #define TARGET_386 (ix86_cpu == PROCESSOR_I386) #define TARGET_486 (ix86_cpu == PROCESSOR_I486) @@ -215,14 +213,14 @@ extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx; extern const int x86_use_loop, x86_use_fiop, x86_use_mov0; extern const int x86_use_cltd, x86_read_modify_write; extern const int x86_read_modify, x86_split_long_moves; -extern const int x86_promote_QImode, x86_single_stringop; +extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix; extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs; extern const int x86_promote_hi_regs, x86_integer_DFmode_moves; extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall; extern const int x86_accumulate_outgoing_args, x86_prologue_using_move; extern const int x86_epilogue_using_move, x86_decompose_lea; -extern const int x86_arch_always_fancy_math_387; +extern const int x86_arch_always_fancy_math_387, x86_shift1; extern int x86_prefetch_sse; #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) @@ -247,6 +245,7 @@ extern int x86_prefetch_sse; #define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & CPUMASK) #define TARGET_READ_MODIFY (x86_read_modify & CPUMASK) #define TARGET_PROMOTE_QImode (x86_promote_QImode & CPUMASK) +#define TARGET_FAST_PREFIX (x86_fast_prefix & CPUMASK) #define TARGET_SINGLE_STRINGOP (x86_single_stringop & CPUMASK) #define TARGET_QIMODE_MATH (x86_qimode_math & CPUMASK) #define TARGET_HIMODE_MATH (x86_himode_math & CPUMASK) @@ -263,6 +262,7 @@ extern int x86_prefetch_sse; #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK) #define TARGET_DECOMPOSE_LEA (x86_decompose_lea & CPUMASK) #define TARGET_PREFETCH_SSE (x86_prefetch_sse) +#define TARGET_SHIFT1 (x86_shift1 & CPUMASK) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) @@ -282,6 +282,9 @@ extern int x86_prefetch_sse; #define TARGET_RED_ZONE (!(target_flags & MASK_NO_RED_ZONE)) +#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU) +#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN) + /* WARNING: Do not mark empty strings for translation, as calling gettext on an empty string does NOT return an empty string. */ @@ -343,30 +346,25 @@ extern int x86_prefetch_sse; N_("Use push instructions to save outgoing arguments") }, \ { "no-push-args", MASK_NO_PUSH_ARGS, \ N_("Do not use push instructions to save outgoing arguments") }, \ - { "accumulate-outgoing-args", (MASK_ACCUMULATE_OUTGOING_ARGS \ - | MASK_ACCUMULATE_OUTGOING_ARGS_SET), \ + { "accumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS, \ N_("Use push instructions to save outgoing arguments") }, \ - { "no-accumulate-outgoing-args",MASK_ACCUMULATE_OUTGOING_ARGS_SET, \ + { "no-accumulate-outgoing-args",-MASK_ACCUMULATE_OUTGOING_ARGS, \ N_("Do not use push instructions to save outgoing arguments") }, \ - { "mmx", MASK_MMX | MASK_MMX_SET, \ + { "mmx", MASK_MMX, \ N_("Support MMX built-in functions") }, \ { "no-mmx", -MASK_MMX, \ N_("Do not support MMX built-in functions") }, \ - { "no-mmx", MASK_MMX_SET, "" }, \ - { "3dnow", MASK_3DNOW | MASK_3DNOW_SET, \ + { "3dnow", MASK_3DNOW, \ N_("Support 3DNow! built-in functions") }, \ - { "no-3dnow", -MASK_3DNOW, "" }, \ - { "no-3dnow", MASK_3DNOW_SET, \ + { "no-3dnow", -MASK_3DNOW, \ N_("Do not support 3DNow! built-in functions") }, \ - { "sse", MASK_SSE | MASK_SSE_SET, \ + { "sse", MASK_SSE, \ N_("Support MMX and SSE built-in functions and code generation") }, \ - { "no-sse", -MASK_SSE, "" }, \ - { "no-sse", MASK_SSE_SET, \ + { "no-sse", -MASK_SSE, \ N_("Do not support MMX and SSE built-in functions and code generation") },\ - { "sse2", MASK_SSE2 | MASK_SSE2_SET, \ + { "sse2", MASK_SSE2, \ N_("Support MMX, SSE and SSE2 built-in functions and code generation") }, \ - { "no-sse2", -MASK_SSE2, "" }, \ - { "no-sse2", MASK_SSE2_SET, \ + { "no-sse2", -MASK_SSE2, \ N_("Do not support MMX, SSE and SSE2 built-in functions and code generation") }, \ { "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \ N_("sizeof(long double) is 16") }, \ @@ -381,38 +379,21 @@ extern int x86_prefetch_sse; { "no-red-zone", MASK_NO_RED_ZONE, \ N_("Do not use red-zone in the x86-64 code") }, \ SUBTARGET_SWITCHES \ - { "", TARGET_DEFAULT, 0 }} + { "", TARGET_DEFAULT | TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_DEFAULT, 0 }} -#ifdef TARGET_64BIT_DEFAULT -#define TARGET_DEFAULT (MASK_64BIT | TARGET_SUBTARGET_DEFAULT) -#else -#define TARGET_DEFAULT TARGET_SUBTARGET_DEFAULT +#ifndef TARGET_64BIT_DEFAULT +#define TARGET_64BIT_DEFAULT 0 #endif -/* Which processor to schedule for. The cpu attribute defines a list that - mirrors this list, so changes to i386.md must be made at the same time. */ - -enum processor_type -{ - PROCESSOR_I386, /* 80386 */ - PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ - PROCESSOR_PENTIUM, - PROCESSOR_PENTIUMPRO, - PROCESSOR_K6, - PROCESSOR_ATHLON, - PROCESSOR_PENTIUM4, - PROCESSOR_max -}; -enum fpmath_unit -{ - FPMATH_387 = 1, - FPMATH_SSE = 2 -}; - -extern enum processor_type ix86_cpu; -extern enum fpmath_unit ix86_fpmath; +/* Once GDB has been enhanced to deal with functions without frame + pointers, we can change this to allow for elimination of + the frame pointer in leaf functions. */ +#define TARGET_DEFAULT 0 -extern int ix86_arch; +/* This is not really a target flag, but is done this way so that + it's analogous to similar code for Mach-O on PowerPC. darwin.h + redefines this to 1. */ +#define TARGET_MACHO 0 /* This macro is similar to `TARGET_SWITCHES' but defines names of command options that have values. Its definition is an @@ -451,6 +432,8 @@ extern int ix86_arch; "" /* Undocumented. */ }, \ { "asm=", &ix86_asm_string, \ N_("Use given assembler dialect") }, \ + { "tls-dialect=", &ix86_tls_dialect_string, \ + N_("Use given thread-local storage dialect") }, \ SUBTARGET_OPTIONS \ } @@ -492,6 +475,138 @@ extern int ix86_arch; %n`-mno-intel-syntax' is deprecated. Use `-masm=att' instead.\n}" #endif +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + size_t arch_len = strlen (ix86_arch_string); \ + size_t cpu_len = strlen (ix86_cpu_string); \ + int last_arch_char = ix86_arch_string[arch_len - 1]; \ + int last_cpu_char = ix86_cpu_string[cpu_len - 1]; \ + \ + if (TARGET_64BIT) \ + { \ + builtin_assert ("cpu=x86_64"); \ + builtin_define ("__x86_64"); \ + builtin_define ("__x86_64__"); \ + builtin_define ("__amd64"); \ + builtin_define ("__amd64__"); \ + } \ + else \ + { \ + builtin_assert ("cpu=i386"); \ + builtin_assert ("machine=i386"); \ + builtin_define_std ("i386"); \ + } \ + \ + /* Built-ins based on -mcpu= (or -march= if no \ + CPU given). */ \ + if (TARGET_386) \ + builtin_define ("__tune_i386__"); \ + else if (TARGET_486) \ + builtin_define ("__tune_i486__"); \ + else if (TARGET_PENTIUM) \ + { \ + builtin_define ("__tune_i586__"); \ + builtin_define ("__tune_pentium__"); \ + if (last_cpu_char == 'x') \ + builtin_define ("__tune_pentium_mmx__"); \ + } \ + else if (TARGET_PENTIUMPRO) \ + { \ + builtin_define ("__tune_i686__"); \ + builtin_define ("__tune_pentiumpro__"); \ + switch (last_cpu_char) \ + { \ + case '3': \ + builtin_define ("__tune_pentium3__"); \ + /* FALLTHRU */ \ + case '2': \ + builtin_define ("__tune_pentium2__"); \ + break; \ + } \ + } \ + else if (TARGET_K6) \ + { \ + builtin_define ("__tune_k6__"); \ + if (last_cpu_char == '2') \ + builtin_define ("__tune_k6_2__"); \ + else if (last_cpu_char == '3') \ + builtin_define ("__tune_k6_3__"); \ + } \ + else if (TARGET_ATHLON) \ + { \ + builtin_define ("__tune_athlon__"); \ + /* Only plain "athlon" lacks SSE. */ \ + if (last_cpu_char != 'n') \ + builtin_define ("__tune_athlon_sse__"); \ + } \ + else if (TARGET_PENTIUM4) \ + builtin_define ("__tune_pentium4__"); \ + \ + if (TARGET_MMX) \ + builtin_define ("__MMX__"); \ + if (TARGET_3DNOW) \ + builtin_define ("__3dNOW__"); \ + if (TARGET_3DNOW_A) \ + builtin_define ("__3dNOW_A__"); \ + if (TARGET_SSE) \ + builtin_define ("__SSE__"); \ + if (TARGET_SSE2) \ + builtin_define ("__SSE2__"); \ + if (TARGET_SSE_MATH && TARGET_SSE) \ + builtin_define ("__SSE_MATH__"); \ + if (TARGET_SSE_MATH && TARGET_SSE2) \ + builtin_define ("__SSE2_MATH__"); \ + \ + /* Built-ins based on -march=. */ \ + if (ix86_arch == PROCESSOR_I486) \ + { \ + builtin_define ("__i486"); \ + builtin_define ("__i486__"); \ + } \ + else if (ix86_arch == PROCESSOR_PENTIUM) \ + { \ + builtin_define ("__i586"); \ + builtin_define ("__i586__"); \ + builtin_define ("__pentium"); \ + builtin_define ("__pentium__"); \ + if (last_arch_char == 'x') \ + builtin_define ("__pentium_mmx__"); \ + } \ + else if (ix86_arch == PROCESSOR_PENTIUMPRO) \ + { \ + builtin_define ("__i686"); \ + builtin_define ("__i686__"); \ + builtin_define ("__pentiumpro"); \ + builtin_define ("__pentiumpro__"); \ + } \ + else if (ix86_arch == PROCESSOR_K6) \ + { \ + \ + builtin_define ("__k6"); \ + builtin_define ("__k6__"); \ + if (last_arch_char == '2') \ + builtin_define ("__k6_2__"); \ + else if (last_arch_char == '3') \ + builtin_define ("__k6_3__"); \ + } \ + else if (ix86_arch == PROCESSOR_ATHLON) \ + { \ + builtin_define ("__athlon"); \ + builtin_define ("__athlon__"); \ + /* Only plain "athlon" lacks SSE. */ \ + if (last_arch_char != 'n') \ + builtin_define ("__athlon_sse__"); \ + } \ + else if (ix86_arch == PROCESSOR_PENTIUM4) \ + { \ + builtin_define ("__pentium4"); \ + builtin_define ("__pentium4__"); \ + } \ + } \ + while (0) + #define TARGET_CPU_DEFAULT_i386 0 #define TARGET_CPU_DEFAULT_i486 1 #define TARGET_CPU_DEFAULT_pentium 2 @@ -510,137 +625,6 @@ extern int ix86_arch; "pentiumpro", "pentium2", "pentium3", \ "pentium4", "k6", "k6-2", "k6-3",\ "athlon", "athlon-4"} -#ifndef CPP_CPU_DEFAULT_SPEC -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_i486 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i486__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i586__ -D__tune_pentium__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium_mmx -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i586__ -D__tune_pentium__ -D__tune_pentium_mmx__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentiumpro -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium2 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__ \ --D__tune_pentium2__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium3 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__ \ --D__tune_pentium2__ -D__tune_pentium3__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium4 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6_2 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__ -D__tune_k6_2__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6_3 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__ -D__tune_k6_3__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_athlon -#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__" -#endif -#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_athlon_sse -#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__ -D__tune_athlon_sse__" -#endif -#ifndef CPP_CPU_DEFAULT_SPEC -#define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__" -#endif -#endif /* CPP_CPU_DEFAULT_SPEC */ - -#ifdef TARGET_BI_ARCH -#define NO_BUILTIN_SIZE_TYPE -#define NO_BUILTIN_PTRDIFF_TYPE -#endif - -#ifdef NO_BUILTIN_SIZE_TYPE -#define CPP_CPU32_SIZE_TYPE_SPEC \ - " -D__SIZE_TYPE__=unsigned\\ int -D__PTRDIFF_TYPE__=int" -#define CPP_CPU64_SIZE_TYPE_SPEC \ - " -D__SIZE_TYPE__=unsigned\\ long\\ int -D__PTRDIFF_TYPE__=long\\ int" -#else -#define CPP_CPU32_SIZE_TYPE_SPEC "" -#define CPP_CPU64_SIZE_TYPE_SPEC "" -#endif - -#define CPP_CPU32_SPEC \ - "-Acpu=i386 -Amachine=i386 %{!ansi:%{!std=c*:%{!std=i*:-Di386}}} -D__i386 \ --D__i386__ %(cpp_cpu32sizet)" - -#define CPP_CPU64_SPEC \ - "-Acpu=x86_64 -Amachine=x86_64 -D__x86_64 -D__x86_64__ %(cpp_cpu64sizet)" - -#define CPP_CPUCOMMON_SPEC "\ -%{march=i386:%{!mcpu*:-D__tune_i386__ }}\ -%{march=i486:-D__i486 -D__i486__ %{!mcpu*:-D__tune_i486__ }}\ -%{march=pentium|march=i586:-D__i586 -D__i586__ -D__pentium -D__pentium__ \ - %{!mcpu*:-D__tune_i586__ -D__tune_pentium__ }}\ -%{march=pentium-mmx:-D__i586 -D__i586__ -D__pentium -D__pentium__ \ - -D__pentium__mmx__ \ - %{!mcpu*:-D__tune_i586__ -D__tune_pentium__ -D__tune_pentium_mmx__}}\ -%{march=pentiumpro|march=i686|march=pentium2|march=pentium3:-D__i686 -D__i686__ \ - -D__pentiumpro -D__pentiumpro__ \ - %{!mcpu*:-D__tune_i686__ -D__tune_pentiumpro__ }}\ -%{march=march=pentium2|march=pentium3: -D__pentium2 -D__pentium2__\ - %{!mcpu*:-D__tune_pentium2__ }}\ -%{march=pentium3: -D__pentium3 -D__pentium3__\ - %{!mcpu*:-D__tune_pentium3__ }}\ -%{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\ -%{march=k6-2:-D__k6 -D__k6__ -D__k6_2__ \ - %{!mcpu*:-D__tune_k6__ -D__tune_k6_2__ }}\ -%{march=k6-3:-D__k6 -D__k6__ -D__k6_3__ \ - %{!mcpu*:-D__tune_k6__ -D__tune_k6_3__ }}\ -%{march=athlon|march=athlon-tbird:-D__athlon -D__athlon__ \ - %{!mcpu*:-D__tune_athlon__ }}\ -%{march=athlon-4|march=athlon-xp|march=athlon-mp:-D__athlon -D__athlon__ \ - -D__athlon_sse__ \ - %{!mcpu*:-D__tune_athlon__ -D__tune_athlon_sse__ }}\ -%{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\ -%{m386|mcpu=i386:-D__tune_i386__ }\ -%{m486|mcpu=i486:-D__tune_i486__ }\ -%{mpentium|mcpu=pentium|mcpu=i586|mcpu=pentium-mmx:-D__tune_i586__ -D__tune_pentium__ }\ -%{mpentiumpro|mcpu=pentiumpro|mcpu=i686|mcpu=pentium2|mcpu=pentium3:-D__tune_i686__ \ --D__tune_pentiumpro__ }\ -%{mcpu=k6|mcpu=k6-2|mcpu=k6-3:-D__tune_k6__ }\ -%{mcpu=athlon|mcpu=athlon-tbird|mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\ --D__tune_athlon__ }\ -%{mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\ --D__tune_athlon_sse__ }\ -%{mcpu=pentium4:-D__tune_pentium4__ }\ -%{march=athlon-xp|march=athlon-mp|march=pentium3|march=pentium4|msse|msse2:\ --D__SSE__ }\ -%{march=pentium-mmx|march=k6|march=k6-2|march=k6-3\ -|march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\ -|march=athlon-mp|march=pentium2|march=pentium3|march=pentium4|mmx|msse|m3dnow: -D__MMX__ }\ -%{march=k6-2|march=k6-3\ -|march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\ -|march=athlon-mp|m3dnow: -D__3dNOW__ }\ -%{march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\ -|march=athlon-mp: -D__3dNOW_A__ }\ -%{march=pentium4|msse2: -D__SSE2__ }\ -%{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}" - -#ifndef CPP_CPU_SPEC -#ifdef TARGET_BI_ARCH -#ifdef TARGET_64BIT_DEFAULT -#define CPP_CPU_SPEC "%{m32:%(cpp_cpu32)}%{!m32:%(cpp_cpu64)} %(cpp_cpucommon)" -#else -#define CPP_CPU_SPEC "%{m64:%(cpp_cpu64)}%{!m64:%(cpp_cpu32)} %(cpp_cpucommon)" -#endif -#else -#ifdef TARGET_64BIT_DEFAULT -#define CPP_CPU_SPEC "%(cpp_cpu64) %(cpp_cpucommon)" -#else -#define CPP_CPU_SPEC "%(cpp_cpu32) %(cpp_cpucommon)" -#endif -#endif -#endif #ifndef CC1_SPEC #define CC1_SPEC "%(cc1_cpu) " @@ -661,24 +645,14 @@ extern int ix86_arch; #endif #define EXTRA_SPECS \ - { "cpp_cpu_default", CPP_CPU_DEFAULT_SPEC }, \ - { "cpp_cpu", CPP_CPU_SPEC }, \ - { "cpp_cpu32", CPP_CPU32_SPEC }, \ - { "cpp_cpu64", CPP_CPU64_SPEC }, \ - { "cpp_cpu32sizet", CPP_CPU32_SIZE_TYPE_SPEC }, \ - { "cpp_cpu64sizet", CPP_CPU64_SIZE_TYPE_SPEC }, \ - { "cpp_cpucommon", CPP_CPUCOMMON_SPEC }, \ { "cc1_cpu", CC1_CPU_SPEC }, \ SUBTARGET_EXTRA_SPECS /* target machine storage layout */ /* Define for XFmode or TFmode extended real floating point support. - This will automatically cause REAL_ARITHMETIC to be defined. - The XFmode is specified by i386 ABI, while TFmode may be faster - due to alignment and simplifications in the address calculations. - */ + due to alignment and simplifications in the address calculations. */ #define LONG_DOUBLE_TYPE_SIZE (TARGET_128BIT_LONG_DOUBLE ? 128 : 96) #define MAX_LONG_DOUBLE_TYPE_SIZE 128 #ifdef __x86_64__ @@ -686,10 +660,14 @@ extern int ix86_arch; #else #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 96 #endif -/* Tell real.c that this is the 80-bit Intel extended float format - packaged in a 128-bit or 96bit entity. */ -#define INTEL_EXTENDED_IEEE_FORMAT 1 +/* Set the value of FLT_EVAL_METHOD in float.h. When using only the + FPU, assume that the fpcw is set to extended precision; when using + only SSE, rounding is correct; when using both SSE and the FPU, + the rounding precision is indeterminate, since either may be chosen + apparently at random. */ +#define TARGET_FLT_EVAL_METHOD \ + (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 1 : 2) #define SHORT_TYPE_SIZE 16 #define INT_TYPE_SIZE 32 @@ -699,7 +677,7 @@ extern int ix86_arch; #define DOUBLE_TYPE_SIZE 64 #define LONG_LONG_TYPE_SIZE 64 -#if defined (TARGET_BI_ARCH) || defined (TARGET_64BIT_DEFAULT) +#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT #define MAX_BITS_PER_WORD 64 #define MAX_LONG_TYPE_SIZE 64 #else @@ -707,11 +685,6 @@ extern int ix86_arch; #define MAX_LONG_TYPE_SIZE 32 #endif -/* Define if you don't want extended real, but do want to use the - software floating point emulator for REAL_ARITHMETIC and - decimal <-> binary conversion. */ -/* #define REAL_ARITHMETIC */ - /* Define this if most significant byte of a word is the lowest numbered. */ /* That is true on the 80386. */ @@ -726,22 +699,13 @@ extern int ix86_arch; /* Not true for 80386 */ #define WORDS_BIG_ENDIAN 0 -/* number of bits in an addressable storage unit */ -#define BITS_PER_UNIT 8 - -/* Width in bits of a "word", which is the contents of a machine register. - Note that this is not necessarily the width of data type `int'; - if using 16-bit ints on a 80386, this would still be 32. - But on a machine with 16-bit registers, this would be 16. */ -#define BITS_PER_WORD (TARGET_64BIT ? 64 : 32) - /* Width of a word, in units (bytes). */ #define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) -#define MIN_UNITS_PER_WORD 4 - -/* Width in bits of a pointer. - See also the macro `Pmode' defined below. */ -#define POINTER_SIZE BITS_PER_WORD +#ifdef IN_LIBGCC2 +#define MIN_UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) +#else +#define MIN_UNITS_PER_WORD 4 +#endif /* Allocation boundary (in *bits*) for storing arguments in argument list. */ #define PARM_BOUNDARY BITS_PER_WORD @@ -774,16 +738,15 @@ extern int ix86_arch; and all fundamental data types supported by the hardware might need to be aligned. No data type wants to be aligned rounder than this. - + Pentium+ preferrs DFmode values to be aligned to 64 bit boundary and Pentium Pro XFmode values at 128 bit boundaries. */ #define BIGGEST_ALIGNMENT 128 -/* Decide whether a variable of mode MODE must be 128 bit aligned. */ +/* Decide whether a variable of mode MODE should be 128 bit aligned. */ #define ALIGN_MODE_128(MODE) \ - ((MODE) == XFmode || (MODE) == TFmode || ((MODE) == TImode) \ - || (MODE) == V4SFmode || (MODE) == V4SImode) + ((MODE) == XFmode || (MODE) == TFmode || SSE_REG_MODE_P (MODE)) /* The published ABIs say that doubles should be aligned on word boundaries, so lower the aligment for structure fields unless @@ -793,7 +756,11 @@ extern int ix86_arch; supports no vector modes, cut out the complexity and fall back on BIGGEST_FIELD_ALIGNMENT. */ #ifdef IN_TARGET_LIBS +#ifdef __x86_64__ +#define BIGGEST_FIELD_ALIGNMENT 128 +#else #define BIGGEST_FIELD_ALIGNMENT 32 +#endif #else #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ x86_field_alignment (FIELD, COMPUTED) @@ -846,13 +813,13 @@ extern int ix86_arch; #define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \ ix86_function_arg_boundary ((MODE), (TYPE)) -/* Set this non-zero if move instructions will actually fail to work +/* Set this nonzero if move instructions will actually fail to work when given unaligned data. */ #define STRICT_ALIGNMENT 0 /* If bit field type is int, don't let it cross an int, and give entire struct the alignment of an int. */ -/* Required on the 386 since it doesn't have bitfield insns. */ +/* Required on the 386 since it doesn't have bit-field insns. */ #define PCC_BITFIELD_TYPE_MATTERS 1 /* Standard register usage. */ @@ -891,7 +858,7 @@ extern int ix86_arch; /* 1 for registers that have pervasive standard uses and are not available for the register allocator. On the 80386, the stack pointer is such, as is the arg pointer. - + The value is an mask - bit 1 is set for fixed registers for 32bit target, while 2 is set for fixed registers for 64bit. Proper value is computed in the CONDITIONAL_REGISTER_USAGE. @@ -909,15 +876,15 @@ extern int ix86_arch; 1, 1, 1, 1, 1, 1, 1, 1, \ /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ 1, 1, 1, 1, 1, 1, 1, 1} - + /* 1 for registers not available across function calls. These must include the FIXED_REGISTERS and also any registers that can be used without being saved. The latter must include the registers where values are returned and the register where structure-value addresses are passed. - Aside from that, you can include as many other registers as you like. - + Aside from that, you can include as many other registers as you like. + The value is an mask - bit 1 is set for call used for 32bit target, while 2 is set for call used for 64bit. Proper value is computed in the CONDITIONAL_REGISTER_USAGE. @@ -1003,7 +970,7 @@ do { \ This is ordinarily the length in words of a value of mode MODE but can be less for certain modes in special long registers. - Actually there are no two word move instructions for consecutive + Actually there are no two word move instructions for consecutive registers. And only registers 0-3 may have mov byte instructions applied to them. */ @@ -1017,9 +984,15 @@ do { \ ? (TARGET_64BIT ? 4 : 6) \ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))) +#define VALID_SSE2_REG_MODE(MODE) \ + ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ + || (MODE) == V2DImode) + #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \ || (MODE) == SFmode \ + /* Always accept SSE2 modes so that xmmintrin.h compiles. */ \ + || VALID_SSE2_REG_MODE (MODE) \ || (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE)))) #define VALID_MMX_REG_MODE_3DNOW(MODE) \ @@ -1047,6 +1020,17 @@ do { \ || (MODE) == CDImode \ || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode))) +/* Return true for modes passed in SSE registers. */ +#define SSE_REG_MODE_P(MODE) \ + ((MODE) == TImode || (MODE) == V16QImode \ + || (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \ + || (MODE) == V4SFmode || (MODE) == V4SImode) + +/* Return true for modes passed in MMX registers. */ +#define MMX_REG_MODE_P(MODE) \ + ((MODE) == V8QImode || (MODE) == V4HImode || (MODE) == V2SImode \ + || (MODE) == V2SFmode) + /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ #define HARD_REGNO_MODE_OK(REGNO, MODE) \ @@ -1064,7 +1048,7 @@ do { \ && (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL)) \ || ((MODE1) == DImode && TARGET_64BIT)) \ && ((MODE2) == HImode || (MODE2) == SImode \ - || ((MODE1) == QImode \ + || ((MODE2) == QImode \ && (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL)) \ || ((MODE2) == DImode && TARGET_64BIT)))) @@ -1110,7 +1094,7 @@ do { \ #define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1) #define LAST_SSE_REG (FIRST_SSE_REG + 7) - + #define FIRST_MMX_REG (LAST_SSE_REG + 1) #define LAST_MMX_REG (FIRST_MMX_REG + 7) @@ -1146,9 +1130,20 @@ do { \ /* Register to hold the addressing base for position independent code access to data items. We don't use PIC pointer for 64bit mode. Define the regnum to dummy value to prevent gcc from - pessimizing code dealing with EBX. */ -#define PIC_OFFSET_TABLE_REGNUM \ - (TARGET_64BIT || !flag_pic ? INVALID_REGNUM : 3) + pessimizing code dealing with EBX. + + To avoid clobbering a call-saved register unnecessarily, we renumber + the pic register when possible. The change is visible after the + prologue has been emitted. */ + +#define REAL_PIC_OFFSET_TABLE_REGNUM 3 + +#define PIC_OFFSET_TABLE_REGNUM \ + (TARGET_64BIT || !flag_pic ? INVALID_REGNUM \ + : reload_completed ? REGNO (pic_offset_table_rtx) \ + : REAL_PIC_OFFSET_TABLE_REGNUM) + +#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_" /* Register in which address to store a structure value arrives in the function. On the 386, the prologue @@ -1350,7 +1345,7 @@ enum reg_class #define MMX_REGNO_P(N) ((N) >= FIRST_MMX_REG && (N) <= LAST_MMX_REG) #define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP))) - + #define STACK_REG_P(XOP) \ (REG_P (XOP) && \ REGNO (XOP) >= FIRST_STACK_REG && \ @@ -1430,7 +1425,7 @@ enum reg_class #define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ ((C) == 'G' ? standard_80387_constant_p (VALUE) \ - : ((C) == 'H' ? standard_sse_constant_p (VALUE) : 0)) + : 0) /* A C expression that defines the optional machine-dependent constraint letters that can be used to segregate specific types of @@ -1444,9 +1439,10 @@ enum reg_class the constraint letter C. If C is not defined as an extra constraint, the value returned should be 0 regardless of VALUE. */ -#define EXTRA_CONSTRAINT(VALUE, C) \ - ((C) == 'e' ? x86_64_sign_extended_value (VALUE) \ - : (C) == 'Z' ? x86_64_zero_extended_value (VALUE) \ +#define EXTRA_CONSTRAINT(VALUE, D) \ + ((D) == 'e' ? x86_64_sign_extended_value (VALUE) \ + : (D) == 'Z' ? x86_64_zero_extended_value (VALUE) \ + : (D) == 'C' ? standard_sse_constant_p (VALUE) \ : 0) /* Place additional restrictions on the register class to use when it @@ -1481,7 +1477,7 @@ enum reg_class ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1) /* QImode spills from non-QI registers need a scratch. This does not - happen often -- the only example so far requires an uninitialized + happen often -- the only example so far requires an uninitialized pseudo. */ #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, OUT) \ @@ -1527,8 +1523,22 @@ enum reg_class || ((CLASS) == SIREG) \ || ((CLASS) == DIREG)) +/* Return a class of registers that cannot change FROM mode to TO mode. + + x87 registers can't do subreg as all values are reformated to extended + precision. XMM registers does not support with nonzero offsets equal + to 4, 8 and 12 otherwise valid for integer registers. Since we can't + determine these, prohibit all nonparadoxical subregs changing size. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (GET_MODE_SIZE (TO) < GET_MODE_SIZE (FROM) \ + ? reg_classes_intersect_p (FLOAT_SSE_REGS, (CLASS)) \ + || MAYBE_MMX_CLASS_P (CLASS) \ + : GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ + ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0) + /* A C statement that adds to CLOBBERS any hard regs the port wishes - to automatically clobber for all asms. + to automatically clobber for all asms. We do this in the new i386 backend to maintain source compatibility with the old cc0-based compiler. */ @@ -1566,7 +1576,7 @@ enum reg_class On 386 pushw decrements by exactly 2 no matter what the position was. On the 386 there is no pushb; we use pushw instead, and this has the effect of rounding up to 2. - + For 64bit ABI we round up to 8 bytes. */ @@ -1721,17 +1731,28 @@ typedef struct ix86_args { #define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) 0 +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) \ + function_arg_pass_by_reference(&CUM, MODE, TYPE, NAMED) + /* If PIC, we cannot make sibling calls to global functions because the PLT requires %ebx live. - If we are returning floats on the register stack, we cannot make - sibling calls to functions that return floats. (The stack adjust - instruction will wind up after the sibcall jump, and not be executed.) */ + If we are returning floats on the 80387 register stack, we cannot + make a sibcall from a function that doesn't return a float to a + function that does or, conversely, from a function that does return + a float to a function that doesn't; the necessary stack adjustment + would not be executed. */ #define FUNCTION_OK_FOR_SIBCALL(DECL) \ ((DECL) \ && (! flag_pic || ! TREE_PUBLIC (DECL)) \ && (! TARGET_FLOAT_RETURNS_IN_80387 \ - || ! FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \ - || FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl)))))) + || (FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \ + == FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl))))))) /* Perform any needed actions needed for a function that is receiving a variable number of arguments. @@ -1756,8 +1777,8 @@ typedef struct ix86_args { ((VALIST) = ix86_build_va_list ()) /* Implement `va_start' for varargs and stdarg. */ -#define EXPAND_BUILTIN_VA_START(STDARG, VALIST, NEXTARG) \ - ix86_va_start ((STDARG), (VALIST), (NEXTARG)) +#define EXPAND_BUILTIN_VA_START(VALIST, NEXTARG) \ + ix86_va_start (VALIST, NEXTARG) /* Implement `va_arg'. */ #define EXPAND_BUILTIN_VA_ARG(VALIST, TYPE) \ @@ -1772,26 +1793,17 @@ typedef struct ix86_args { /* Output assembler code to FILE to increment profiler label # LABELNO for profiling a function entry. */ -#define FUNCTION_PROFILER(FILE, LABELNO) \ -do { \ - if (flag_pic) \ - { \ - fprintf ((FILE), "\tleal\t%sP%d@GOTOFF(%%ebx),%%edx\n", \ - LPREFIX, (LABELNO)); \ - fprintf ((FILE), "\tcall\t*_mcount@GOT(%%ebx)\n"); \ - } \ - else \ - { \ - fprintf ((FILE), "\tmovl\t$%sP%d,%%edx\n", LPREFIX, (LABELNO)); \ - fprintf ((FILE), "\tcall\t_mcount\n"); \ - } \ -} while (0) +#define FUNCTION_PROFILER(FILE, LABELNO) x86_function_profiler (FILE, LABELNO) + +#define MCOUNT_NAME "_mcount" + +#define PROFILE_COUNT_REGISTER "edx" /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, the stack pointer does not matter. The value is tested only in functions that have frame pointers. No definition is equivalent to always zero. */ -/* Note on the 386 it might be more efficient not to define this since +/* Note on the 386 it might be more efficient not to define this since we have to restore it ourselves from the frame pointer, in order to use pop */ @@ -1945,15 +1957,12 @@ do { \ #define MAX_REGS_PER_ADDRESS 2 -#define CONSTANT_ADDRESS_P(X) \ - (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ - || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST \ - || GET_CODE (X) == CONST_DOUBLE) +#define CONSTANT_ADDRESS_P(X) constant_address_p (X) /* Nonzero if the constant value X is a legitimate general operand. It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ -#define LEGITIMATE_CONSTANT_P(X) 1 +#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X) #ifdef REG_OK_STRICT #define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ @@ -2013,12 +2022,10 @@ do { \ #define REWRITE_ADDRESS(X) rewrite_address (X) /* Nonzero if the constant value X is a legitimate general operand - when generating PIC code. It is given that flag_pic is on and + when generating PIC code. It is given that flag_pic is on and that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ -#define LEGITIMATE_PIC_OPERAND_P(X) \ - (! SYMBOLIC_CONST (X) \ - || legitimate_pic_address_disp_p (X)) +#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X) #define SYMBOLIC_CONST(X) \ (GET_CODE (X) == SYMBOL_REF \ @@ -2087,9 +2094,12 @@ enum ix86_builtins IX86_BUILTIN_CVTPI2PS, IX86_BUILTIN_CVTPS2PI, IX86_BUILTIN_CVTSI2SS, + IX86_BUILTIN_CVTSI642SS, IX86_BUILTIN_CVTSS2SI, + IX86_BUILTIN_CVTSS2SI64, IX86_BUILTIN_CVTTPS2PI, IX86_BUILTIN_CVTTSS2SI, + IX86_BUILTIN_CVTTSS2SI64, IX86_BUILTIN_MAXPS, IX86_BUILTIN_MAXSS, @@ -2118,6 +2128,16 @@ enum ix86_builtins IX86_BUILTIN_MOVNTPS, IX86_BUILTIN_MOVNTQ, + IX86_BUILTIN_LOADDQA, + IX86_BUILTIN_LOADDQU, + IX86_BUILTIN_STOREDQA, + IX86_BUILTIN_STOREDQU, + IX86_BUILTIN_MOVQ, + IX86_BUILTIN_LOADD, + IX86_BUILTIN_STORED, + + IX86_BUILTIN_CLRTI, + IX86_BUILTIN_PACKSSWB, IX86_BUILTIN_PACKSSDW, IX86_BUILTIN_PACKUSWB, @@ -2125,6 +2145,7 @@ enum ix86_builtins IX86_BUILTIN_PADDB, IX86_BUILTIN_PADDW, IX86_BUILTIN_PADDD, + IX86_BUILTIN_PADDQ, IX86_BUILTIN_PADDSB, IX86_BUILTIN_PADDSW, IX86_BUILTIN_PADDUSB, @@ -2132,6 +2153,7 @@ enum ix86_builtins IX86_BUILTIN_PSUBB, IX86_BUILTIN_PSUBW, IX86_BUILTIN_PSUBD, + IX86_BUILTIN_PSUBQ, IX86_BUILTIN_PSUBSB, IX86_BUILTIN_PSUBSW, IX86_BUILTIN_PSUBUSB, @@ -2201,7 +2223,7 @@ enum ix86_builtins IX86_BUILTIN_RSQRTSS, IX86_BUILTIN_SQRTPS, IX86_BUILTIN_SQRTSS, - + IX86_BUILTIN_UNPCKHPS, IX86_BUILTIN_UNPCKLPS, @@ -2248,50 +2270,233 @@ enum ix86_builtins IX86_BUILTIN_SSE_ZERO, IX86_BUILTIN_MMX_ZERO, + /* SSE2 */ + IX86_BUILTIN_ADDPD, + IX86_BUILTIN_ADDSD, + IX86_BUILTIN_DIVPD, + IX86_BUILTIN_DIVSD, + IX86_BUILTIN_MULPD, + IX86_BUILTIN_MULSD, + IX86_BUILTIN_SUBPD, + IX86_BUILTIN_SUBSD, + + IX86_BUILTIN_CMPEQPD, + IX86_BUILTIN_CMPLTPD, + IX86_BUILTIN_CMPLEPD, + IX86_BUILTIN_CMPGTPD, + IX86_BUILTIN_CMPGEPD, + IX86_BUILTIN_CMPNEQPD, + IX86_BUILTIN_CMPNLTPD, + IX86_BUILTIN_CMPNLEPD, + IX86_BUILTIN_CMPNGTPD, + IX86_BUILTIN_CMPNGEPD, + IX86_BUILTIN_CMPORDPD, + IX86_BUILTIN_CMPUNORDPD, + IX86_BUILTIN_CMPNEPD, + IX86_BUILTIN_CMPEQSD, + IX86_BUILTIN_CMPLTSD, + IX86_BUILTIN_CMPLESD, + IX86_BUILTIN_CMPNEQSD, + IX86_BUILTIN_CMPNLTSD, + IX86_BUILTIN_CMPNLESD, + IX86_BUILTIN_CMPORDSD, + IX86_BUILTIN_CMPUNORDSD, + IX86_BUILTIN_CMPNESD, + + IX86_BUILTIN_COMIEQSD, + IX86_BUILTIN_COMILTSD, + IX86_BUILTIN_COMILESD, + IX86_BUILTIN_COMIGTSD, + IX86_BUILTIN_COMIGESD, + IX86_BUILTIN_COMINEQSD, + IX86_BUILTIN_UCOMIEQSD, + IX86_BUILTIN_UCOMILTSD, + IX86_BUILTIN_UCOMILESD, + IX86_BUILTIN_UCOMIGTSD, + IX86_BUILTIN_UCOMIGESD, + IX86_BUILTIN_UCOMINEQSD, + + IX86_BUILTIN_MAXPD, + IX86_BUILTIN_MAXSD, + IX86_BUILTIN_MINPD, + IX86_BUILTIN_MINSD, + + IX86_BUILTIN_ANDPD, + IX86_BUILTIN_ANDNPD, + IX86_BUILTIN_ORPD, + IX86_BUILTIN_XORPD, + + IX86_BUILTIN_SQRTPD, + IX86_BUILTIN_SQRTSD, + + IX86_BUILTIN_UNPCKHPD, + IX86_BUILTIN_UNPCKLPD, + + IX86_BUILTIN_SHUFPD, + + IX86_BUILTIN_LOADAPD, + IX86_BUILTIN_LOADUPD, + IX86_BUILTIN_STOREAPD, + IX86_BUILTIN_STOREUPD, + IX86_BUILTIN_LOADSD, + IX86_BUILTIN_STORESD, + IX86_BUILTIN_MOVSD, + + IX86_BUILTIN_LOADHPD, + IX86_BUILTIN_LOADLPD, + IX86_BUILTIN_STOREHPD, + IX86_BUILTIN_STORELPD, + + IX86_BUILTIN_CVTDQ2PD, + IX86_BUILTIN_CVTDQ2PS, + + IX86_BUILTIN_CVTPD2DQ, + IX86_BUILTIN_CVTPD2PI, + IX86_BUILTIN_CVTPD2PS, + IX86_BUILTIN_CVTTPD2DQ, + IX86_BUILTIN_CVTTPD2PI, + + IX86_BUILTIN_CVTPI2PD, + IX86_BUILTIN_CVTSI2SD, + IX86_BUILTIN_CVTSI642SD, + + IX86_BUILTIN_CVTSD2SI, + IX86_BUILTIN_CVTSD2SI64, + IX86_BUILTIN_CVTSD2SS, + IX86_BUILTIN_CVTSS2SD, + IX86_BUILTIN_CVTTSD2SI, + IX86_BUILTIN_CVTTSD2SI64, + + IX86_BUILTIN_CVTPS2DQ, + IX86_BUILTIN_CVTPS2PD, + IX86_BUILTIN_CVTTPS2DQ, + + IX86_BUILTIN_MOVNTI, + IX86_BUILTIN_MOVNTPD, + IX86_BUILTIN_MOVNTDQ, + + IX86_BUILTIN_SETPD1, + IX86_BUILTIN_SETPD, + IX86_BUILTIN_CLRPD, + IX86_BUILTIN_SETRPD, + IX86_BUILTIN_LOADPD1, + IX86_BUILTIN_LOADRPD, + IX86_BUILTIN_STOREPD1, + IX86_BUILTIN_STORERPD, + + /* SSE2 MMX */ + IX86_BUILTIN_MASKMOVDQU, + IX86_BUILTIN_MOVMSKPD, + IX86_BUILTIN_PMOVMSKB128, + IX86_BUILTIN_MOVQ2DQ, + IX86_BUILTIN_MOVDQ2Q, + + IX86_BUILTIN_PACKSSWB128, + IX86_BUILTIN_PACKSSDW128, + IX86_BUILTIN_PACKUSWB128, + + IX86_BUILTIN_PADDB128, + IX86_BUILTIN_PADDW128, + IX86_BUILTIN_PADDD128, + IX86_BUILTIN_PADDQ128, + IX86_BUILTIN_PADDSB128, + IX86_BUILTIN_PADDSW128, + IX86_BUILTIN_PADDUSB128, + IX86_BUILTIN_PADDUSW128, + IX86_BUILTIN_PSUBB128, + IX86_BUILTIN_PSUBW128, + IX86_BUILTIN_PSUBD128, + IX86_BUILTIN_PSUBQ128, + IX86_BUILTIN_PSUBSB128, + IX86_BUILTIN_PSUBSW128, + IX86_BUILTIN_PSUBUSB128, + IX86_BUILTIN_PSUBUSW128, + + IX86_BUILTIN_PAND128, + IX86_BUILTIN_PANDN128, + IX86_BUILTIN_POR128, + IX86_BUILTIN_PXOR128, + + IX86_BUILTIN_PAVGB128, + IX86_BUILTIN_PAVGW128, + + IX86_BUILTIN_PCMPEQB128, + IX86_BUILTIN_PCMPEQW128, + IX86_BUILTIN_PCMPEQD128, + IX86_BUILTIN_PCMPGTB128, + IX86_BUILTIN_PCMPGTW128, + IX86_BUILTIN_PCMPGTD128, + + IX86_BUILTIN_PEXTRW128, + IX86_BUILTIN_PINSRW128, + + IX86_BUILTIN_PMADDWD128, + + IX86_BUILTIN_PMAXSW128, + IX86_BUILTIN_PMAXUB128, + IX86_BUILTIN_PMINSW128, + IX86_BUILTIN_PMINUB128, + + IX86_BUILTIN_PMULUDQ, + IX86_BUILTIN_PMULUDQ128, + IX86_BUILTIN_PMULHUW128, + IX86_BUILTIN_PMULHW128, + IX86_BUILTIN_PMULLW128, + + IX86_BUILTIN_PSADBW128, + IX86_BUILTIN_PSHUFHW, + IX86_BUILTIN_PSHUFLW, + IX86_BUILTIN_PSHUFD, + + IX86_BUILTIN_PSLLW128, + IX86_BUILTIN_PSLLD128, + IX86_BUILTIN_PSLLQ128, + IX86_BUILTIN_PSRAW128, + IX86_BUILTIN_PSRAD128, + IX86_BUILTIN_PSRLW128, + IX86_BUILTIN_PSRLD128, + IX86_BUILTIN_PSRLQ128, + IX86_BUILTIN_PSLLDQI128, + IX86_BUILTIN_PSLLWI128, + IX86_BUILTIN_PSLLDI128, + IX86_BUILTIN_PSLLQI128, + IX86_BUILTIN_PSRAWI128, + IX86_BUILTIN_PSRADI128, + IX86_BUILTIN_PSRLDQI128, + IX86_BUILTIN_PSRLWI128, + IX86_BUILTIN_PSRLDI128, + IX86_BUILTIN_PSRLQI128, + + IX86_BUILTIN_PUNPCKHBW128, + IX86_BUILTIN_PUNPCKHWD128, + IX86_BUILTIN_PUNPCKHDQ128, + IX86_BUILTIN_PUNPCKHQDQ128, + IX86_BUILTIN_PUNPCKLBW128, + IX86_BUILTIN_PUNPCKLWD128, + IX86_BUILTIN_PUNPCKLDQ128, + IX86_BUILTIN_PUNPCKLQDQ128, + + IX86_BUILTIN_CLFLUSH, + IX86_BUILTIN_MFENCE, + IX86_BUILTIN_LFENCE, + IX86_BUILTIN_MAX }; -/* Define this macro if references to a symbol must be treated - differently depending on something about the variable or - function named by the symbol (such as what section it is in). - - On i386, if using PIC, mark a SYMBOL_REF for a non-global symbol - so that we may access it directly in the GOT. */ - -#define ENCODE_SECTION_INFO(DECL) \ -do { \ - if (flag_pic) \ - { \ - rtx rtl = (TREE_CODE_CLASS (TREE_CODE (DECL)) != 'd' \ - ? TREE_CST_RTL (DECL) : DECL_RTL (DECL)); \ - \ - if (GET_CODE (rtl) == MEM) \ - { \ - if (TARGET_DEBUG_ADDR \ - && TREE_CODE_CLASS (TREE_CODE (DECL)) == 'd') \ - { \ - fprintf (stderr, "Encode %s, public = %d\n", \ - IDENTIFIER_POINTER (DECL_NAME (DECL)), \ - TREE_PUBLIC (DECL)); \ - } \ - \ - SYMBOL_REF_FLAG (XEXP (rtl, 0)) \ - = (TREE_CODE_CLASS (TREE_CODE (DECL)) != 'd' \ - || ! TREE_PUBLIC (DECL)); \ - } \ - } \ -} while (0) - -/* The `FINALIZE_PIC' macro serves as a hook to emit these special - codes once the function is being compiled into assembly code, but - not before. (It is not done before, because in the case of - compiling an inline function, it would lead to multiple PIC - prologues being included in functions which used inline functions - and were compiled to assembly language.) */ - -#define FINALIZE_PIC \ - (current_function_uses_pic_offset_table |= current_function_profile) - +#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info +#define TARGET_STRIP_NAME_ENCODING ix86_strip_name_encoding + +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { \ + const char *xname = (NAME); \ + if (xname[0] == '%') \ + xname += 2; \ + if (xname[0] == '*') \ + xname += 1; \ + else \ + fputs (user_label_prefix, FILE); \ + fputs (xname, FILE); \ + } while (0) /* Max number of args passed in registers. If this is more than 3, we will have problems with ebx (register #4), since it is a caller save register and @@ -2359,7 +2564,7 @@ do { \ /* When a prototype says `char' or `short', really pass an `int'. (The 386 can't easily push less than an int.) */ -#define PROMOTE_PROTOTYPES (!TARGET_64BIT) +#define PROMOTE_PROTOTYPES 1 /* A macro to update M and UNSIGNEDP when an object whose type is TYPE and which has the specified mode and signedness is to be @@ -2395,7 +2600,7 @@ do { \ precise value of the constant, which is available for examination in X, and the rtx code of the expression in which it is contained, found in OUTER_CODE. - + CODE is the expression code--redundant, since it can be obtained with `GET_CODE (X)'. */ @@ -2411,16 +2616,21 @@ do { \ return flag_pic && SYMBOLIC_CONST (RTX) ? 1 : 0; \ \ case CONST_DOUBLE: \ - { \ - int code; \ - if (GET_MODE (RTX) == VOIDmode) \ - return 0; \ - \ - code = standard_80387_constant_p (RTX); \ - return code == 1 ? 1 : \ - code == 2 ? 2 : \ - 3; \ - } + if (GET_MODE (RTX) == VOIDmode) \ + return 0; \ + switch (standard_80387_constant_p (RTX)) \ + { \ + case 1: /* 0.0 */ \ + return 1; \ + case 2: /* 1.0 */ \ + return 2; \ + default: \ + /* Start with (MEM (SYMBOL_REF)), since that's where \ + it'll probably end up. Add a penalty for size. */ \ + return (COSTS_N_INSNS (1) + (flag_pic != 0) \ + + (GET_MODE (RTX) == SFmode ? 0 \ + : GET_MODE (RTX) == DFmode ? 1 : 2)); \ + } /* Delete the definition here when TOPLEVEL_COSTS_N_INSNS gets added to cse.c */ #define TOPLEVEL_COSTS_N_INSNS(N) \ @@ -2497,7 +2707,9 @@ do { \ break; \ \ case MULT: \ - if (GET_CODE (XEXP (X, 1)) == CONST_INT) \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fmul); \ + else if (GET_CODE (XEXP (X, 1)) == CONST_INT) \ { \ unsigned HOST_WIDE_INT value = INTVAL (XEXP (X, 1)); \ int nbits = 0; \ @@ -2519,10 +2731,16 @@ do { \ case UDIV: \ case MOD: \ case UMOD: \ - TOPLEVEL_COSTS_N_INSNS (ix86_cost->divide); \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fdiv); \ + else \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->divide); \ + break; \ \ case PLUS: \ - if (!TARGET_DECOMPOSE_LEA \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fadd); \ + else if (!TARGET_DECOMPOSE_LEA \ && INTEGRAL_MODE_P (GET_MODE (X)) \ && GET_MODE_BITSIZE (GET_MODE (X)) <= GET_MODE_BITSIZE (Pmode)) \ { \ @@ -2562,26 +2780,50 @@ do { \ + rtx_cost (XEXP (X, 1), (OUTER_CODE))); \ } \ } \ + /* fall through */ \ \ + case MINUS: \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fadd); \ /* fall through */ \ + \ case AND: \ case IOR: \ case XOR: \ - case MINUS: \ if (!TARGET_64BIT && GET_MODE (X) == DImode) \ return (COSTS_N_INSNS (ix86_cost->add) * 2 \ + (rtx_cost (XEXP (X, 0), (OUTER_CODE)) \ << (GET_MODE (XEXP (X, 0)) != DImode)) \ + (rtx_cost (XEXP (X, 1), (OUTER_CODE)) \ << (GET_MODE (XEXP (X, 1)) != DImode))); \ - \ /* fall through */ \ + \ case NEG: \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fchs); \ + /* fall through */ \ + \ case NOT: \ if (!TARGET_64BIT && GET_MODE (X) == DImode) \ TOPLEVEL_COSTS_N_INSNS (ix86_cost->add * 2); \ TOPLEVEL_COSTS_N_INSNS (ix86_cost->add); \ \ + case FLOAT_EXTEND: \ + if (!TARGET_SSE_MATH \ + || !VALID_SSE_REG_MODE (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (0); \ + break; \ + \ + case ABS: \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fabs); \ + break; \ + \ + case SQRT: \ + if (FLOAT_MODE_P (GET_MODE (X))) \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->fsqrt); \ + break; \ + \ egress_rtx_costs: \ break; @@ -2686,10 +2928,10 @@ do { \ cost many times greater than aligned accesses, for example if they are emulated in a trap handler. - When this macro is non-zero, the compiler will act as if - `STRICT_ALIGNMENT' were non-zero when generating code for block + When this macro is nonzero, the compiler will act as if + `STRICT_ALIGNMENT' were nonzero when generating code for block moves. This can cause significantly more instructions to be - produced. Therefore, do not set this macro non-zero if unaligned + produced. Therefore, do not set this macro nonzero if unaligned accesses only add a cycle or two to the time for a memory access. If the value of this macro is always zero, it need not be defined. */ @@ -2716,34 +2958,6 @@ do { \ #define NO_RECURSIVE_FUNCTION_CSE -/* Add any extra modes needed to represent the condition code. - - For the i386, we need separate modes when floating-point - equality comparisons are being done. - - Add CCNO to indicate comparisons against zero that requires - Overflow flag to be unset. Sign bit test is used instead and - thus can be used to form "a&b>0" type of tests. - - Add CCGC to indicate comparisons agains zero that allows - unspecified garbage in the Carry flag. This mode is used - by inc/dec instructions. - - Add CCGOC to indicate comparisons agains zero that allows - unspecified garbage in the Carry and Overflow flag. This - mode is used to simulate comparisons of (a-b) and (a+b) - against zero using sub/cmp/add operations. - - Add CCZ to indicate that only the Zero flag is valid. */ - -#define EXTRA_CC_MODES \ - CC (CCGCmode, "CCGC") \ - CC (CCGOCmode, "CCGOC") \ - CC (CCNOmode, "CCNO") \ - CC (CCZmode, "CCZ") \ - CC (CCFPmode, "CCFP") \ - CC (CCFPUmode, "CCFPU") - /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, return the mode to be used for the comparison. @@ -2755,7 +2969,7 @@ do { \ #define SELECT_CC_MODE(OP, X, Y) ix86_cc_mode ((OP), (X), (Y)) -/* Return non-zero if MODE implies a floating point inequality can be +/* Return nonzero if MODE implies a floating point inequality can be reversed. */ #define REVERSIBLE_CC_MODE(MODE) 1 @@ -2779,7 +2993,7 @@ do { \ For float regs, the stack top is sometimes referred to as "%st(0)" instead of just "%st". PRINT_REG handles this with the "y" code. */ -#undef HI_REGISTER_NAMES +#undef HI_REGISTER_NAMES #define HI_REGISTER_NAMES \ {"ax","dx","cx","bx","si","di","bp","sp", \ "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)","", \ @@ -2829,7 +3043,7 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER]; /* Before the prologue, RA is at 0(%esp). */ #define INCOMING_RETURN_ADDR_RTX \ gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM)) - + /* After the prologue, RA is at -4(AP) in the current frame. */ #define RETURN_ADDR_RTX(COUNT, FRAME) \ ((COUNT) == 0 \ @@ -2860,12 +3074,6 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER]; ? ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4\ : DW_EH_PE_absptr) -/* This is how to output the definition of a user-level label named NAME, - such as the label on a static function or variable NAME. */ - -#define ASM_OUTPUT_LABEL(FILE, NAME) \ - (assemble_name ((FILE), (NAME)), fputs (":\n", (FILE))) - /* Store in OUTPUT a string (made with alloca) containing an assembler-name for a local static variable named NAME. LABELNO is an integer which is different for each call. */ @@ -2914,7 +3122,7 @@ do { \ #define JUMP_TABLES_IN_TEXT_SECTION \ (!TARGET_64BIT && flag_pic && !HAVE_AS_GOTOFF_IN_DATA) -/* A C statement that outputs an address constant appropriate to +/* A C statement that outputs an address constant appropriate to for DWARF debugging. */ #define ASM_OUTPUT_DWARF_ADDR_CONST(FILE, X) \ @@ -2925,6 +3133,13 @@ do { \ #define ASM_SIMPLIFY_DWARF_ADDR(X) \ i386_simplify_dwarf_addr (X) +/* Emit a dtp-relative reference to a TLS variable. */ + +#ifdef HAVE_AS_TLS +#define ASM_OUTPUT_DWARF_DTPREL(FILE, SIZE, X) \ + i386_output_dwarf_dtprel (FILE, SIZE, X) +#endif + /* Switch to init or fini section via SECTION_OP, emit a call to FUNC, and switch back. For x86 we do this only to save a few bytes that would otherwise be unused in the text section. */ @@ -2939,7 +3154,7 @@ do { \ print_operand function. */ #define PRINT_OPERAND_PUNCT_VALID_P(CODE) \ - ((CODE) == '*' || (CODE) == '+') + ((CODE) == '*' || (CODE) == '+' || (CODE) == '&') /* Print the name of a register based on its machine mode and number. If CODE is 'w', pretend the mode is HImode. @@ -2958,6 +3173,12 @@ do { \ #define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ print_operand_address ((FILE), (ADDR)) +#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL) \ +do { \ + if (! output_addr_const_extra (FILE, (X))) \ + goto FAIL; \ +} while (0); + /* Print the name of a register for based on its machine mode and number. This macro is used to print debugging output. This macro is different from PRINT_REG in that it may be used in @@ -3064,6 +3285,7 @@ do { \ {"general_no_elim_operand", {CONST_INT, CONST_DOUBLE, CONST, \ SYMBOL_REF, LABEL_REF, SUBREG, REG, MEM}}, \ {"nonmemory_no_elim_operand", {CONST_INT, REG, SUBREG}}, \ + {"index_register_operand", {SUBREG, REG}}, \ {"q_regs_operand", {SUBREG, REG}}, \ {"non_q_regs_operand", {SUBREG, REG}}, \ {"fcmov_comparison_operator", {EQ, NE, LTU, GTU, LEU, GEU, UNORDERED, \ @@ -3088,7 +3310,17 @@ do { \ {"memory_displacement_operand", {MEM}}, \ {"cmpsi_operand", {CONST_INT, CONST_DOUBLE, CONST, SYMBOL_REF, \ LABEL_REF, SUBREG, REG, MEM, AND}}, \ - {"long_memory_operand", {MEM}}, + {"long_memory_operand", {MEM}}, \ + {"tls_symbolic_operand", {SYMBOL_REF}}, \ + {"global_dynamic_symbolic_operand", {SYMBOL_REF}}, \ + {"local_dynamic_symbolic_operand", {SYMBOL_REF}}, \ + {"initial_exec_symbolic_operand", {SYMBOL_REF}}, \ + {"local_exec_symbolic_operand", {SYMBOL_REF}}, \ + {"any_fp_register_operand", {REG}}, \ + {"register_and_not_any_fp_reg_operand", {REG}}, \ + {"fp_register_operand", {REG}}, \ + {"register_and_not_fp_reg_operand", {REG}}, \ + {"vector_move_operand", {CONST_VECTOR, SUBREG, REG, MEM}}, \ /* A list of predicates that do special things with modes, and so should not elicit warnings for VOIDmode match_operand. */ @@ -3096,57 +3328,90 @@ do { \ #define SPECIAL_MODE_PREDICATES \ "ext_register_operand", -/* CM_32 is used by 32bit ABI - CM_SMALL is small model assuming that all code and data fits in the first - 31bits of address space. - CM_KERNEL is model assuming that all code and data fits in the negative - 31bits of address space. - CM_MEDIUM is model assuming that code fits in the first 31bits of address - space. Size of data is unlimited. - CM_LARGE is model making no assumptions about size of particular sections. - - CM_SMALL_PIC is model for PIC libraries assuming that code+data+got/plt - tables first in 31bits of address space. - */ +/* Which processor to schedule for. The cpu attribute defines a list that + mirrors this list, so changes to i386.md must be made at the same time. */ + +enum processor_type +{ + PROCESSOR_I386, /* 80386 */ + PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ + PROCESSOR_PENTIUM, + PROCESSOR_PENTIUMPRO, + PROCESSOR_K6, + PROCESSOR_ATHLON, + PROCESSOR_PENTIUM4, + PROCESSOR_max +}; + +extern enum processor_type ix86_cpu; +extern const char *ix86_cpu_string; + +extern enum processor_type ix86_arch; +extern const char *ix86_arch_string; + +enum fpmath_unit +{ + FPMATH_387 = 1, + FPMATH_SSE = 2 +}; + +extern enum fpmath_unit ix86_fpmath; +extern const char *ix86_fpmath_string; + +enum tls_dialect +{ + TLS_DIALECT_GNU, + TLS_DIALECT_SUN +}; + +extern enum tls_dialect ix86_tls_dialect; +extern const char *ix86_tls_dialect_string; + enum cmodel { - CM_32, - CM_SMALL, - CM_KERNEL, - CM_MEDIUM, - CM_LARGE, - CM_SMALL_PIC + CM_32, /* The traditional 32-bit ABI. */ + CM_SMALL, /* Assumes all code and data fits in the low 31 bits. */ + CM_KERNEL, /* Assumes all code and data fits in the high 31 bits. */ + CM_MEDIUM, /* Assumes code fits in the low 31 bits; data unlimited. */ + CM_LARGE, /* No assumptions. */ + CM_SMALL_PIC /* Assumes code+data+got/plt fits in a 31 bit region. */ }; +extern enum cmodel ix86_cmodel; +extern const char *ix86_cmodel_string; + /* Size of the RED_ZONE area. */ #define RED_ZONE_SIZE 128 /* Reserved area of the red zone for temporaries. */ #define RED_ZONE_RESERVE 8 -extern const char *ix86_debug_arg_string, *ix86_debug_addr_string; enum asm_dialect { ASM_ATT, ASM_INTEL }; + extern const char *ix86_asm_string; extern enum asm_dialect ix86_asm_dialect; -/* Value of -mcmodel specified by user. */ -extern const char *ix86_cmodel_string; -extern enum cmodel ix86_cmodel; - -/* Variables in i386.c */ -extern const char *ix86_cpu_string; /* for -mcpu=<xxx> */ -extern const char *ix86_arch_string; /* for -march=<xxx> */ -extern const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ -extern const char *ix86_regparm_string; /* # registers to use to pass args */ -extern const char *ix86_align_loops_string; /* power of two alignment for loops */ -extern const char *ix86_align_jumps_string; /* power of two alignment for non-loop jumps */ -extern const char *ix86_align_funcs_string; /* power of two alignment for functions */ -extern const char *ix86_preferred_stack_boundary_string;/* power of two alignment for stack boundary */ -extern const char *ix86_branch_cost_string; /* values 1-5: see jump.c */ -extern int ix86_regparm; /* ix86_regparm_string as a number */ -extern int ix86_preferred_stack_boundary; /* preferred stack boundary alignment in bits */ -extern int ix86_branch_cost; /* values 1-5: see jump.c */ -extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER]; /* smalled class containing REGNO */ + +extern int ix86_regparm; +extern const char *ix86_regparm_string; + +extern int ix86_preferred_stack_boundary; +extern const char *ix86_preferred_stack_boundary_string; + +extern int ix86_branch_cost; +extern const char *ix86_branch_cost_string; + +extern const char *ix86_debug_arg_string; +extern const char *ix86_debug_addr_string; + +/* Obsoleted by -f options. Remove before 3.2 ships. */ +extern const char *ix86_align_loops_string; +extern const char *ix86_align_jumps_string; +extern const char *ix86_align_funcs_string; + +/* Smallest class containing REGNO. */ +extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER]; + extern rtx ix86_compare_op0; /* operand 0 for comparisons */ extern rtx ix86_compare_op1; /* operand 1 for comparisons */ @@ -3156,12 +3421,12 @@ extern rtx ix86_compare_op1; /* operand 1 for comparisons */ redundant computation of new control word by the mode switching pass. The fldcw instructions are still emitted redundantly, but this is probably not going to be noticeable problem, as most CPUs do have fast path for - the sequence. + the sequence. The machinery is to emit simple truncation instructions and split them before reload to instructions having USEs of two memory locations that are filled by this code to old and new control word. - + Post-reload pass may be later used to eliminate the redundant fildcw if needed. */ @@ -3222,6 +3487,10 @@ enum fp_cw_mode {FP_CW_STORED, FP_CW_UNINITIALIZED, FP_CW_ANY}; ((SRC) < FIRST_STACK_REG || (SRC) > LAST_STACK_REG) +#define MACHINE_DEPENDENT_REORG(X) x86_machine_dependent_reorg(X) + +#define DLL_IMPORT_EXPORT_PREFIX '@' + /* Local variables: version-control: t diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index 36a0497..1fa2998 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -1,5 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -49,55 +50,77 @@ ;; 'k' Likewise, print the SImode name of the register. ;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh. ;; 'y' Print "st(0)" instead of "st" as a register. -;; + ;; UNSPEC usage: -;; 0 This is a `scas' operation. The mode of the UNSPEC is always SImode. -;; operand 0 is the memory address to scan. -;; operand 1 is a register containing the value to scan for. The mode -;; of the scas opcode will be the same as the mode of this operand. -;; operand 2 is the known alignment of operand 0. -;; 1 This is a `sin' operation. The mode of the UNSPEC is MODE_FLOAT. -;; operand 0 is the argument for `sin'. -;; 2 This is a `cos' operation. The mode of the UNSPEC is MODE_FLOAT. -;; operand 0 is the argument for `cos'. -;; 3 This is part of a `stack probe' operation. The mode of the UNSPEC is -;; always SImode. operand 0 is the size of the stack allocation. -;; 4 This is the source of a fake SET of the frame pointer which is used to -;; prevent insns referencing it being scheduled across the initial -;; decrement of the stack pointer. -;; 5 This is a `bsf' operation. -;; 6 This is the @GOT offset of a PIC address. -;; 7 This is the @GOTOFF offset of a PIC address. -;; 8 This is a reference to a symbol's @PLT address. -;; 9 This is an `fnstsw' operation. -;; 10 This is a `sahf' operation. -;; 11 This is a `fstcw' operation -;; 12 This is behaviour of add when setting carry flag. -;; 13 This is a `eh_return' placeholder. - -;; For SSE/MMX support: -;; 30 This is `fix', guaranteed to be truncating. -;; 31 This is a `emms' operation. -;; 32 This is a `maskmov' operation. -;; 33 This is a `movmsk' operation. -;; 34 This is a `non-temporal' move. -;; 36 This is used to distinguish COMISS from UCOMISS. -;; 37 This is a `ldmxcsr' operation. -;; 38 This is a forced `movaps' instruction (rather than whatever movti does) -;; 39 This is a forced `movups' instruction (rather than whatever movti does) -;; 40 This is a `stmxcsr' operation. -;; 41 This is a `shuffle' operation. -;; 42 This is a `rcp' operation. -;; 43 This is a `rsqsrt' operation. -;; 44 This is a `sfence' operation. -;; 45 This is a noop to prevent excessive combiner cleverness. -;; 46 This is a `femms' operation. -;; 49 This is a 'pavgusb' operation. -;; 50 This is a `pfrcp' operation. -;; 51 This is a `pfrcpit1' operation. -;; 52 This is a `pfrcpit2' operation. -;; 53 This is a `pfrsqrt' operation. -;; 54 This is a `pfrsqrit1' operation. + +(define_constants + [; Relocation specifiers + (UNSPEC_GOT 0) + (UNSPEC_GOTOFF 1) + (UNSPEC_GOTPCREL 2) + (UNSPEC_GOTTPOFF 3) + (UNSPEC_TPOFF 4) + (UNSPEC_NTPOFF 5) + (UNSPEC_DTPOFF 6) + (UNSPEC_GOTNTPOFF 7) + (UNSPEC_INDNTPOFF 8) + + ; Prologue support + (UNSPEC_STACK_PROBE 10) + (UNSPEC_STACK_ALLOC 11) + (UNSPEC_SET_GOT 12) + (UNSPEC_SSE_PROLOGUE_SAVE 13) + + ; TLS support + (UNSPEC_TP 15) + (UNSPEC_TLS_GD 16) + (UNSPEC_TLS_LD_BASE 17) + + ; Other random patterns + (UNSPEC_SCAS 20) + (UNSPEC_SIN 21) + (UNSPEC_COS 22) + (UNSPEC_BSF 23) + (UNSPEC_FNSTSW 24) + (UNSPEC_SAHF 25) + (UNSPEC_FSTCW 26) + (UNSPEC_ADD_CARRY 27) + (UNSPEC_FLDCW 28) + + ; For SSE/MMX support: + (UNSPEC_FIX 30) + (UNSPEC_MASKMOV 32) + (UNSPEC_MOVMSK 33) + (UNSPEC_MOVNT 34) + (UNSPEC_MOVA 38) + (UNSPEC_MOVU 39) + (UNSPEC_SHUFFLE 41) + (UNSPEC_RCP 42) + (UNSPEC_RSQRT 43) + (UNSPEC_SFENCE 44) + (UNSPEC_NOP 45) ; prevents combiner cleverness + (UNSPEC_PAVGUSB 49) + (UNSPEC_PFRCP 50) + (UNSPEC_PFRCPIT1 51) + (UNSPEC_PFRCPIT2 52) + (UNSPEC_PFRSQRT 53) + (UNSPEC_PFRSQIT1 54) + (UNSPEC_PSHUFLW 55) + (UNSPEC_PSHUFHW 56) + (UNSPEC_MFENCE 59) + (UNSPEC_LFENCE 60) + (UNSPEC_PSADBW 61) + ]) + +(define_constants + [(UNSPECV_BLOCKAGE 0) + (UNSPECV_EH_RETURN 13) + (UNSPECV_EMMS 31) + (UNSPECV_LDMXCSR 37) + (UNSPECV_STMXCSR 40) + (UNSPECV_FEMMS 46) + (UNSPECV_CLFLUSH 57) + ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -116,26 +139,44 @@ ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" - "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx,fistp" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + icmp,test,ibr,setcc,icmov, + push,pop,call,callv, + str,cld, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp, + sselog,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn -(define_attr "mode" "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI" +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI,V4SF,V2DF,V2SF" (const_string "unknown")) -;; Set for i387 operations. -(define_attr "i387" "" - (if_then_else (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") - (const_int 1) - (const_int 0))) +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") + (const_string "i387") + (eq_attr "type" "sselog,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" - (cond [(eq_attr "type" "incdec,setcc,icmov,ibr,str,cld,lea,other,multi,idiv,sse,mmx") + (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv") (const_int 0) - (eq_attr "i387" "1") + (eq_attr "unit" "i387,sse,mmx") (const_int 0) - (eq_attr "type" "alu1,negnot,alu,icmp,imovx,ishift,imul,push,pop") + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, + imul,icmp,push,pop") (symbol_ref "ix86_attr_length_immediate_default(insn,1)") (eq_attr "type" "imov,test") (symbol_ref "ix86_attr_length_immediate_default(insn,0)") @@ -147,22 +188,20 @@ (if_then_else (match_operand 1 "constant_call_address_operand" "") (const_int 4) (const_int 0)) + ;; We don't know the size before shorten_branches. Expect + ;; the instruction to fit for better scheduling. (eq_attr "type" "ibr") - (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) - (lt (minus (match_dup 0) (pc)) - (const_int 124))) - (const_int 1) - (const_int 4)) + (const_int 1) ] - (symbol_ref "/* Update immediate_length and other attributes! */ abort(),1"))) + (symbol_ref "/* Update immediate_length and other attributes! */ + abort(),1"))) ;; The (bounding maximum) length of an instruction address. (define_attr "length_address" "" (cond [(eq_attr "type" "str,cld,other,multi,fxch") (const_int 0) (and (eq_attr "type" "call") - (match_operand 1 "constant_call_address_operand" "")) + (match_operand 0 "constant_call_address_operand" "")) (const_int 0) (and (eq_attr "type" "callv") (match_operand 1 "constant_call_address_operand" "")) @@ -172,16 +211,25 @@ ;; Set when length prefix is used. (define_attr "prefix_data16" "" - (if_then_else (eq_attr "mode" "HI") + (if_then_else (ior (eq_attr "mode" "HI") + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF"))) (const_int 1) (const_int 0))) ;; Set when string REP prefix is used. -(define_attr "prefix_rep" "" (const_int 0)) +(define_attr "prefix_rep" "" + (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + (const_int 0))) ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" - (if_then_else (eq_attr "type" "imovx,setcc,icmov,sse,mmx") + (if_then_else + (eq_attr "type" + "imovx,setcc,icmov, + sselog,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_int 1) (const_int 0))) @@ -189,7 +237,7 @@ (define_attr "modrm" "" (cond [(eq_attr "type" "str,cld") (const_int 0) - (eq_attr "i387" "1") + (eq_attr "unit" "i387") (const_int 0) (and (eq_attr "type" "incdec") (ior (match_operand:SI 1 "register_operand" "") @@ -205,6 +253,12 @@ (and (match_operand 0 "register_operand" "") (match_operand 1 "immediate_operand" ""))) (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) ] (const_int 1))) @@ -214,11 +268,15 @@ (define_attr "length" "" (cond [(eq_attr "type" "other,multi,fistp") (const_int 16) - ] + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address")))] (plus (plus (attr "modrm") (plus (attr "prefix_0f") - (plus (attr "i387") - (const_int 1)))) + (const_int 1))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -243,7 +301,7 @@ (if_then_else (match_operand 0 "memory_operand" "") (const_string "both") (const_string "load")) - (eq_attr "type" "icmp,test") + (eq_attr "type" "icmp,test,ssecmp,mmxcmp,fcmp") (if_then_else (ior (match_operand 0 "memory_operand" "") (match_operand 1 "memory_operand" "")) (const_string "load") @@ -270,7 +328,12 @@ (const_string "store") (match_operand 1 "memory_operand" "") (const_string "load") - (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx") + (and (eq_attr "type" + "!alu1,negnot, + imov,imovx,icmp,test, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecvt, + mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") (and (eq_attr "type" "icmov") @@ -284,11 +347,11 @@ (define_attr "imm_disp" "false,true,unknown" (cond [(eq_attr "type" "other,multi") (const_string "unknown") - (and (eq_attr "type" "icmp,test,imov") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 1 "immediate_operand" ""))) (const_string "true") - (and (eq_attr "type" "alu,ishift,imul,idiv") + (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 2 "immediate_operand" ""))) (const_string "true") @@ -305,710 +368,10 @@ [(set_attr "length" "128") (set_attr "type" "multi")]) -;; Pentium Scheduling -;; -;; The Pentium is an in-order core with two integer pipelines. - -;; True for insns that behave like prefixed insns on the Pentium. -(define_attr "pent_prefix" "false,true" - (if_then_else (ior (eq_attr "prefix_0f" "1") - (ior (eq_attr "prefix_data16" "1") - (eq_attr "prefix_rep" "1"))) - (const_string "true") - (const_string "false"))) - -;; Categorize how an instruction slots. - -;; The non-MMX Pentium slots an instruction with prefixes on U pipe only, -;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium -;; rules, because it results in noticeably better code on non-MMX Pentium -;; and doesn't hurt much on MMX. (Prefixed instructions are not very -;; common, so the scheduler usualy has a non-prefixed insn to pair). - -(define_attr "pent_pair" "uv,pu,pv,np" - (cond [(eq_attr "imm_disp" "true") - (const_string "np") - (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec") - (and (eq_attr "type" "pop,push") - (eq_attr "memory" "!both"))) - (if_then_else (eq_attr "pent_prefix" "true") - (const_string "pu") - (const_string "uv")) - (eq_attr "type" "ibr") - (const_string "pv") - (and (eq_attr "type" "ishift") - (match_operand 2 "const_int_operand" "")) - (const_string "pu") - (and (eq_attr "type" "call") - (match_operand 0 "constant_call_address_operand" "")) - (const_string "pv") - (and (eq_attr "type" "callv") - (match_operand 1 "constant_call_address_operand" "")) - (const_string "pv") - ] - (const_string "np"))) - -;; Rough readiness numbers. Fine tuning happens in i386.c. -;; -;; u describes pipe U -;; v describes pipe V -;; uv describes either pipe U or V for those that can issue to either -;; np describes not paring -;; fpu describes fpu -;; fpm describes fp insns of different types are not pipelined. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "imul")) - 11 11) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "imul")) - 11 11) - -;; Rep movs takes minimally 12 cycles. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "str")) - 12 12) - -; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22 -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "idiv")) - 46 46) - -; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode, -; 3 cycles for XFmode. Stores takes 2 cycles for SF/DF and 3 for XF. -; fldz and fld1 takes 2 cycles. Only reg-reg moves are pairable. -; The integer <-> fp conversion is not modeled correctly. Fild behaves -; like normal fp operation and fist takes 6 cycles. - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF")))) - 3 3) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF")))) - 3 3) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (ior (match_operand 1 "immediate_operand" "") - (eq_attr "memory" "store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (ior (match_operand 1 "immediate_operand" "") - (eq_attr "memory" "store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "cld")) - 2 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "none,load"))) - 1 1) - -; Read/Modify/Write instructions usually take 3 cycles. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "pu") - (eq_attr "memory" "both")))) - 3 3) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "both")))) - 3 3) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,negnot,ishift") - (and (eq_attr "pent_pair" "np") - (eq_attr "memory" "both")))) - 3 3) - -; Read/Modify or Modify/Write instructions usually take 2 cycles. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "pu") - (eq_attr "memory" "load,store")))) - 2 2) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "load,store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "np") - (eq_attr "memory" "load,store")))) - 2 2) - -; Insns w/o memory operands and move instructions usually take one cycle. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "pu")) - 1 1) - -(define_function_unit "pent_v" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "pv")) - 1 1) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "!np")) - 1 1) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "np")) - 1 1) - -; Pairable insns only conflict with other non-pairable insns. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "both")))) - 3 3 - [(eq_attr "pent_pair" "np")]) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "load,store")))) - 2 2 - [(eq_attr "pent_pair" "np")]) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "!np")) - 1 1 - [(eq_attr "pent_pair" "np")]) - -; Floating point instructions usually blocks cycle longer when combined with -; integer instructions, because of the inpaired fxch instruction. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")) - 2 2 - [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")]) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fcmp,fxch,fsgn")) - 1 1) - -; Addition takes 3 cycles; assume other random cruft does as well. -; ??? Trivial fp operations such as fabs or fchs takes only one cycle. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fop,fop1,fistp")) - 3 1) - -; Multiplication takes 3 cycles and is only half pipelined. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmul")) - 3 1) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmul")) - 2 2) - -; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. -; They can overlap with integer insns. Only the last two cycles can overlap -; with other fp insns. Only fsin/fcos can overlap with multiplies. -; Only last two cycles of fsin/fcos can overlap with other instructions. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fdiv")) - 39 37) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fdiv")) - 39 39) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fpspc")) - 70 68) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fpspc")) - 70 70) - -;; Pentium Pro/PII Scheduling -;; -;; The PPro has an out-of-order core, but the instruction decoders are -;; naturally in-order and asymmetric. We get best performance by scheduling -;; for the decoders, for in doing so we give the oo execution unit the -;; most choices. - -;; Categorize how many uops an ia32 instruction evaluates to: -;; one -- an instruction with 1 uop can be decoded by any of the -;; three decoders. -;; few -- an instruction with 1 to 4 uops can be decoded only by -;; decoder 0. -;; many -- a complex instruction may take an unspecified number of -;; cycles to decode in decoder 0. - -(define_attr "ppro_uops" "one,few,many" - (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") - (const_string "many") - (eq_attr "type" "icmov,fcmov,str,cld") - (const_string "few") - (eq_attr "type" "imov") - (if_then_else (eq_attr "memory" "store,both") - (const_string "few") - (const_string "one")) - (eq_attr "memory" "!none") - (const_string "few") - ] - (const_string "one"))) - -;; Rough readiness numbers. Fine tuning happens in i386.c. -;; -;; p0 describes port 0. -;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either. -;; p2 describes port 2 for loads. -;; p34 describes ports 3 and 4 for stores. -;; fpu describes the fpu accessed via port 0. -;; ??? It is less than clear if there are separate fadd and fmul units -;; that could operate in parallel. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "ishift,lea,ibr,cld")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imul")) - 4 1) - -;; ??? Does the divider lock out the pipe while it works, -;; or is there a disconnected unit? -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "idiv")) - 17 17) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fop,fop1,fsgn,fistp")) - 3 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fcmov")) - 2 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fcmp")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmov")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmul")) - 5 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fdiv,fpspc")) - 56 1) - -(define_function_unit "ppro_p01" 2 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "!imov,fmov")) - 1 1) - -(define_function_unit "ppro_p01" 2 0 - (and (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imov,fmov")) - (eq_attr "memory" "none")) - 1 1) - -(define_function_unit "ppro_p2" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (ior (eq_attr "type" "pop") - (eq_attr "memory" "load,both"))) - 3 1) - -(define_function_unit "ppro_p34" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (ior (eq_attr "type" "push") - (eq_attr "memory" "store,both"))) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp")) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmul")) - 5 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fdiv,fpspc")) - 56 56) - -;; imul uses the fpu. ??? does it have the same throughput as fmul? -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imul")) - 4 1) - -;; AMD K6/K6-2 Scheduling -;; -;; The K6 has similar architecture to PPro. Important difference is, that -;; there are only two decoders and they seems to be much slower than execution -;; units. So we have to pay much more attention to proper decoding for -;; schedulers. We share most of scheduler code for PPro in i386.c -;; -;; The fp unit is not pipelined and do one operation per two cycles including -;; the FXCH. -;; -;; alu describes both ALU units (ALU-X and ALU-Y). -;; alux describes X alu unit -;; fpu describes FPU unit -;; load describes load unit. -;; branch describes branch unit. -;; store decsribes store unit. This unit is not modelled completely and only -;; used to model lea operation. Otherwise it lie outside of the critical -;; path. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -;; The decoder specification is in the PPro section above! - -;; Shift instructions and certain arithmetic are issued only to X pipe. -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot,cld")) - 1 1) - -;; The QI mode arithmetic is issued to X pipe only. -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec") - (match_operand:QI 0 "general_operand" ""))) - 1 1) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea")) - 1 1) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "imov") - (eq_attr "memory" "none"))) - 1 1) - -(define_function_unit "k6_branch" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "call,callv,ibr")) - 1 1) - -;; Load unit have two cycle latency, but we take care for it in adjust_cost -(define_function_unit "k6_load" 1 0 - (and (eq_attr "cpu" "k6") - (ior (eq_attr "type" "pop") - (eq_attr "memory" "load,both"))) - 1 1) - -(define_function_unit "k6_load" 1 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "str") - (eq_attr "memory" "load,both"))) - 10 10) - -;; Lea have two instructions, so latency is probably 2 -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "lea")) - 2 1) - -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "str")) - 10 10) - -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (ior (eq_attr "type" "push") - (eq_attr "memory" "store,both"))) - 1 1) - -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fop,fop1,fmov,fcmp,fistp")) - 2 2) - -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fmul")) - 2 2) - -;; ??? Guess -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fdiv,fpspc")) - 56 56) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "imul")) - 2 2) - -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "imul")) - 2 2) - -;; ??? Guess -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "idiv")) - 17 17) - -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "idiv")) - 17 17) - -;; AMD Athlon Scheduling -;; -;; The Athlon does contain three pipelined FP units, three integer units and -;; three address generation units. -;; -;; The predecode logic is determining boundaries of instructions in the 64 -;; byte cache line. So the cache line straddling problem of K6 might be issue -;; here as well, but it is not noted in the documentation. -;; -;; Three DirectPath instructions decoders and only one VectorPath decoder -;; is available. They can decode three DirectPath instructions or one VectorPath -;; instruction per cycle. -;; Decoded macro instructions are then passed to 72 entry instruction control -;; unit, that passes -;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. -;; -;; The load/store queue unit is not attached to the schedulers but -;; communicates with all the execution units separately instead. - -(define_attr "athlon_decode" "direct,vector" - (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") - (const_string "vector") - (and (eq_attr "type" "push") - (match_operand 1 "memory_operand" "")) - (const_string "vector") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF"))) - (const_string "vector")] - (const_string "direct"))) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "vector")) - 1 1) - -(define_function_unit "athlon_directdec" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1 [(eq_attr "athlon_decode" "vector")]) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) - 1 1) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "str")) - 15 15) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 42) - -(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" - (cond [(eq_attr "type" "fop,fop1,fcmp,fistp") - (const_string "add") - (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") - (const_string "mul") - (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) - (const_string "store") - (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) - (const_string "any") - (and (eq_attr "type" "fmov") - (ior (match_operand:SI 1 "register_operand" "") - (match_operand 1 "immediate_operand" ""))) - (const_string "store") - (eq_attr "type" "fmov") - (const_string "muladd")] - (const_string "none"))) - -;; We use latencies 1 for definitions. This is OK to model colisions -;; in execution units. The real latencies are modeled in the "fp" pipeline. - -;; fsin, fcos: 96-192 -;; fsincos: 107-211 -;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fpspc")) - 100 1) - -;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fdiv")) - 24 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fop,fop1,fmul,fistp")) - 4 1) - -;; XFmode loads are slow. -;; XFmode store is slow too (8 cycles), but we don't need to model it, because -;; there are no dependent instructions. - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load") - (eq_attr "mode" "XF")))) - 10 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fmov,fsgn")) - 2 1) - -;; fcmp and ftst instructions -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "direct"))) - 3 1) - -;; fcmpi instructions. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "vector"))) - 3 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fcmov")) - 7 1) - -(define_function_unit "athlon_fp_mul" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "mul")) - 1 1) - -(define_function_unit "athlon_fp_add" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "add")) - 1 1) - -(define_function_unit "athlon_fp_muladd" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "muladd,mul,add")) - 1 1) - -(define_function_unit "athlon_fp_store" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "store")) - 1 1) - -;; We don't need to model the Address Generation Unit, since we don't model -;; the re-order buffer yet and thus we never schedule more than three operations -;; at time. Later we may want to experiment with MD_SCHED macros modeling the -;; decoders independently on the functional units. - -;(define_function_unit "athlon_agu" 3 0 -; (and (eq_attr "cpu" "athlon") -; (and (eq_attr "memory" "!none") -; (eq_attr "athlon_fpunits" "none"))) -; 1 1) - -;; Model load unit to avoid too long sequences of loads. We don't need to -;; model store queue, since it is hardly going to be bottleneck. - -(define_function_unit "athlon_load" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "memory" "load,both")) - 1 1) - +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") ;; Compare instructions. @@ -1382,7 +745,8 @@ [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand 1 "register_operand" "f") - (match_operand 2 "const0_operand" "X"))] 9))] + (match_operand 2 "const0_operand" "X"))] + UNSPEC_FNSTSW))] "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" @@ -1413,7 +777,8 @@ (unspec:HI [(compare:CCFP (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "nonimmediate_operand" "fm"))] 9))] + (match_operand:SF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "fcmp") @@ -1434,7 +799,8 @@ (unspec:HI [(compare:CCFP (match_operand:DF 1 "register_operand" "f") - (match_operand:DF 2 "nonimmediate_operand" "fm"))] 9))] + (match_operand:DF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -1465,7 +831,8 @@ (unspec:HI [(compare:CCFP (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "register_operand" "f"))] 9))] + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "!TARGET_64BIT && TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -1476,7 +843,8 @@ (unspec:HI [(compare:CCFP (match_operand:TF 1 "register_operand" "f") - (match_operand:TF 2 "register_operand" "f"))] 9))] + (match_operand:TF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -1499,7 +867,8 @@ (unspec:HI [(compare:CCFPU (match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f"))] 9))] + (match_operand 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" @@ -1547,12 +916,12 @@ (define_insn "x86_fnstsw_1" [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI [(reg 18)] 9))] + (unspec:HI [(reg 18)] UNSPEC_FNSTSW))] "TARGET_80387" "fnstsw\t%0" [(set_attr "length" "2") (set_attr "mode" "SI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "ppro_uops" "few")]) ;; FP compares, step 3 @@ -1560,7 +929,7 @@ (define_insn "x86_sahf_1" [(set (reg:CC 17) - (unspec:CC [(match_operand:HI 0 "register_operand" "a")] 10))] + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))] "!TARGET_64BIT" "sahf" [(set_attr "length" "1") @@ -1591,7 +960,7 @@ && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp,sse") + [(set_attr "type" "fcmp,ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1602,7 +971,7 @@ "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1627,7 +996,7 @@ && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "fcmp,sse") + [(set_attr "type" "fcmp,ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1638,7 +1007,7 @@ "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1733,25 +1102,20 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "1")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm") + (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) { - case TYPE_SSE: - if (get_attr_mode (insn) == TImode) + case TYPE_SSEMOV: + if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; - case TYPE_MMX: - if (get_attr_mode (insn) == DImode) + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) return "movq\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; @@ -1759,46 +1123,44 @@ return "lea{l}\t{%1, %0|%0, %1}"; default: - if (flag_pic && SYMBOLIC_CONST (operands[1])) + if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) abort(); return "mov{l}\t{%1, %0|%0, %1}"; } } [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6") - (const_string "mmx") - (eq_attr "alternative" "7,8,9") - (const_string "sse") + (cond [(eq_attr "alternative" "2,3,4") + (const_string "mmxmov") + (eq_attr "alternative" "5,6,7") + (const_string "ssemov") (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") - (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")]) + (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabssi_1_rex64" - [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:SI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{l}\t{%1, %P0|%P0, %1} - mov{l}\t{%1, %a0|%a0, %1} - movabs{l}\t{%1, %a0|%a0, %1}" + mov{l}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*movabssi_2_rex64" [(set (match_operand:SI 0 "register_operand" "=a,r") (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{l}\t{%P1, %0|%0, %P1} mov{l}\t{%a1, %0|%0, %a1}" @@ -1848,14 +1210,9 @@ [(set_attr "type" "push") (set_attr "mode" "QI")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m") - (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) @@ -1872,59 +1229,57 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "imov") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "imov") (and (ne (symbol_ref "TARGET_MOVX") (const_int 0)) - (eq_attr "alternative" "0,1,3,4")) + (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "SI") - (and (eq_attr "alternative" "0,1") + (and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "SI") ] - (const_string "HI"))) - (set_attr "modrm" "0,*,*,0,*,*")]) + (const_string "HI")))]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabshi_1_rex64" - [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:HI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{w}\t{%1, %P0|%P0, %1} - mov{w}\t{%1, %a0|%a0, %1} - movabs{w}\t{%1, %a0|%a0, %1}" + mov{w}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*movabshi_2_rex64" [(set (match_operand:HI 0 "register_operand" "=a,r") (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{w}\t{%P1, %0|%0, %P1} mov{w}\t{%a1, %0|%0, %a1}" @@ -2122,7 +1477,7 @@ (define_expand "movstrictqi" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL" + "! TARGET_PARTIAL_REG_STALL || optimize_size" { /* Don't generate memory->memory moves, go through a register. */ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) @@ -2132,7 +1487,7 @@ (define_insn "*movstrictqi_1" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (match_operand:QI 1 "general_operand" "*qn,m"))] - "! TARGET_PARTIAL_REG_STALL + "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "mov{b}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") @@ -2226,24 +1581,23 @@ ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsqi_1_rex64" - [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:QI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{b}\t{%1, %P0|%P0, %1} - mov{b}\t{%1, %a0|%a0, %1} - movabs{b}\t{%1, %a0|%a0, %1}" + mov{b}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "QI")]) (define_insn "*movabsqi_2_rex64" [(set (match_operand:QI 0 "register_operand" "=a,r") (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{b}\t{%P1, %0|%0, %P1} mov{b}\t{%a1, %0|%0, %a1}" @@ -2484,7 +1838,7 @@ movq\t{%1, %0|%0, %1} movdqa\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,mmx,mmx,sse,sse,sse") + [(set_attr "type" "*,*,mmx,mmx,ssemov,ssemov,ssemov") (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")]) (define_split @@ -2513,19 +1867,19 @@ { switch (get_attr_type (insn)) { - case TYPE_SSE: + case TYPE_SSEMOV: if (register_operand (operands[0], DImode) && register_operand (operands[1], DImode)) return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ - case TYPE_MMX: + case TYPE_MMXMOV: return "movq\t{%1, %0|%0, %1}"; case TYPE_MULTI: return "#"; case TYPE_LEA: return "lea{q}\t{%a1, %0|%0, %a1}"; default: - if (flag_pic && SYMBOLIC_CONST (operands[1])) + if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) abort (); if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; @@ -2537,9 +1891,9 @@ } [(set (attr "type") (cond [(eq_attr "alternative" "5,6") - (const_string "mmx") - (eq_attr "alternative" "7,8") - (const_string "sse") + (const_string "mmxmov") + (eq_attr "alternative" "7,8,9") + (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") (and (ne (symbol_ref "flag_pic") (const_int 0)) @@ -2557,7 +1911,7 @@ (define_insn "*movabsdi_1_rex64" [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) (match_operand:DI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{q}\t{%1, %P0|%P0, %1} mov{q}\t{%1, %a0|%a0, %1}" @@ -2571,7 +1925,7 @@ (define_insn "*movabsdi_2_rex64" [(set (match_operand:DI 0 "register_operand" "=a,r") (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{q}\t{%P1, %0|%0, %P1} mov{q}\t{%a1, %0|%0, %a1}" @@ -2711,21 +2065,21 @@ ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:SF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) (set (mem:SF (reg:SI 7)) (match_dup 1))]) (define_split [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "register_operand" ""))] - "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:SF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) (set (mem:SF (reg:DI 7)) (match_dup 1))]) (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") - (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf,rm,*y,*y"))] + (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2763,7 +2117,7 @@ case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2) + if (TARGET_SSE2 && !TARGET_ATHLON) return "pxor\t%0, %0"; else return "xorps\t%0, %0"; @@ -2787,7 +2141,7 @@ abort(); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sse,sse,sse,sse,mmx,mmx,mmx") + [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) (define_insn "*swapsf" @@ -2883,16 +2237,16 @@ ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "!TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:DF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT && reload_completed" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) (set (mem:DF (reg:SI 7)) (match_dup 1))] "") (define_split [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:DF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT && reload_completed" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) (set (mem:DF (reg:DI 7)) (match_dup 1))] "") @@ -2910,7 +2264,7 @@ (define_insn "*movdf_nointeger" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") - (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))] + (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && (reload_in_progress || reload_completed @@ -2949,7 +2303,10 @@ case 4: return "#"; case 5: - return "pxor\t%0, %0"; + if (TARGET_ATHLON) + return "xorpd\t%0, %0"; + else + return "pxor\t%0, %0"; case 6: if (TARGET_PARTIAL_REG_DEPENDENCY) return "movapd\t{%1, %0|%0, %1}"; @@ -2963,12 +2320,12 @@ abort(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") - (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))] + (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && !optimize_size && TARGET_INTEGER_DFMODE_MOVES && (reload_in_progress || reload_completed @@ -3008,7 +2365,10 @@ return "#"; case 5: - return "pxor\t%0, %0"; + if (TARGET_ATHLON) + return "xorpd\t%0, %0"; + else + return "pxor\t%0, %0"; case 6: if (TARGET_PARTIAL_REG_DEPENDENCY) return "movapd\t{%1, %0|%0, %1}"; @@ -3022,7 +2382,7 @@ abort(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) (define_split @@ -3196,28 +2556,28 @@ && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == DFmode) - && (!REG_P (operands[1]) || !ANY_FP_REGNO_P (REGNO (operands[1])))" + && !ANY_FP_REG_P (operands[1])" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_split [(set (match_operand:XF 0 "push_operand" "") - (match_operand:XF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:XF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:XF (reg:SI 7)) (match_dup 1))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:TF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TF (reg:SI 7)) (match_dup 1))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "register_operand" ""))] - "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:TF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) (set (mem:TF (reg:DI 7)) (match_dup 1))]) @@ -3969,15 +3329,15 @@ (define_split [(set (match_operand:DF 0 "push_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) (set (mem:DF (reg:SI 7)) (float_extend:DF (match_dup 1)))]) (define_split [(set (match_operand:DF 0 "push_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) (set (mem:DF (reg:DI 7)) (float_extend:DF (match_dup 1)))]) @@ -3989,8 +3349,8 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) @@ -4002,15 +3362,15 @@ (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TF (reg:SI 7)) (float_extend:TF (match_dup 1)))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) (set (mem:DF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) @@ -4022,8 +3382,8 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:DF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) @@ -4035,23 +3395,27 @@ (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) (set (mem:TF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) (define_expand "extendsfdf2" [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))] + (float_extend:DF (match_operand:SF 1 "general_operand" "")))] "TARGET_80387 || TARGET_SSE2" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -4086,7 +3450,7 @@ abort (); } } - [(set_attr "type" "fmov,fmov,sse") + [(set_attr "type" "fmov,fmov,ssecvt") (set_attr "mode" "SF,XF,DF")]) (define_insn "*extendsfdf2_1_sse_only" @@ -4095,14 +3459,18 @@ "!TARGET_80387 && TARGET_SSE2 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "cvtss2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) (define_expand "extendsfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "")))] + (float_extend:XF (match_operand:SF 1 "general_operand" "")))] "!TARGET_64BIT && TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -4141,9 +3509,13 @@ (define_expand "extendsftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "")))] + (float_extend:TF (match_operand:SF 1 "general_operand" "")))] "TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -4182,9 +3554,13 @@ (define_expand "extenddfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "")))] + (float_extend:XF (match_operand:DF 1 "general_operand" "")))] "!TARGET_64BIT && TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (DFmode, operands[1]); }) @@ -4223,9 +3599,13 @@ (define_expand "extenddftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "")))] + (float_extend:TF (match_operand:DF 1 "general_operand" "")))] "TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (DFmode, operands[1]); }) @@ -4325,7 +3705,7 @@ abort (); } } - [(set_attr "type" "fmov,multi,multi,multi,sse") + [(set_attr "type" "fmov,multi,multi,multi,ssecvt") (set_attr "mode" "SF,SF,SF,SF,DF")]) (define_insn "*truncdfsf2_2" @@ -4348,7 +3728,7 @@ abort (); } } - [(set_attr "type" "sse,fmov") + [(set_attr "type" "ssecvt,fmov") (set_attr "mode" "DF,SF")]) (define_insn "truncdfsf2_3" @@ -4371,7 +3751,7 @@ (match_operand:DF 1 "nonimmediate_operand" "mY")))] "!TARGET_80387 && TARGET_SSE2" "cvtsd2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) (define_split @@ -4396,10 +3776,9 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF - (match_operand:DF 1 "register_operand" ""))) + (match_operand:DF 1 "fp_register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed - && FP_REG_P (operands[1])" + "TARGET_80387 && reload_completed" [(set (match_dup 2) (float_truncate:SF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") @@ -4788,14 +4167,14 @@ (fix:DI (match_operand:SF 1 "nonimmediate_operand" "xm")))] "TARGET_64BIT && TARGET_SSE" "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) (define_insn "fix_truncdfdi_sse" [(set (match_operand:DI 0 "register_operand" "=r") (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] "TARGET_64BIT && TARGET_SSE2" "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) ;; Signed conversion to SImode. @@ -4896,14 +4275,14 @@ (fix:SI (match_operand:SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) (define_insn "fix_truncdfsi_sse" [(set (match_operand:SI 0 "register_operand" "=r") (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] "TARGET_SSE2" "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) (define_split [(set (match_operand:SI 0 "register_operand" "") @@ -5034,22 +4413,22 @@ ;; %% Not used yet. (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") - (unspec:HI [(reg:HI 18)] 11))] + (unspec:HI [(reg:HI 18)] UNSPEC_FSTCW))] "TARGET_80387" "fnstcw\t%0" [(set_attr "length" "2") (set_attr "mode" "HI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "ppro_uops" "few")]) (define_insn "x86_fldcw_1" [(set (reg:HI 18) - (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] 12))] + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] "TARGET_80387" "fldcw\t%0" [(set_attr "length" "2") (set_attr "mode" "HI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) @@ -5083,7 +4462,7 @@ fild%z1\t%1 # cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5092,7 +4471,7 @@ (float:SF (match_operand:SI 1 "nonimmediate_operand" "mr")))] "TARGET_SSE" "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5121,7 +4500,7 @@ fild%z1\t%1 # cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5130,7 +4509,7 @@ (float:SF (match_operand:DI 1 "nonimmediate_operand" "mr")))] "TARGET_64BIT && TARGET_SSE" "cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5159,7 +4538,7 @@ fild%z1\t%1 # cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5168,7 +4547,7 @@ (float:DF (match_operand:SI 1 "nonimmediate_operand" "mr")))] "TARGET_SSE2" "cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5197,7 +4576,7 @@ fild%z1\t%1 # cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5206,7 +4585,7 @@ (float:DF (match_operand:DI 1 "nonimmediate_operand" "mr")))] "TARGET_SSE2" "cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5278,10 +4657,9 @@ ;; %%% Kill these when reload knows how to do it. (define_split - [(set (match_operand 0 "register_operand" "") + [(set (match_operand 0 "fp_register_operand" "") (float (match_operand 1 "register_operand" "")))] - "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0])) - && FP_REG_P (operands[0])" + "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); @@ -5320,7 +4698,8 @@ (match_operand:DI 2 "general_operand" ""))) (clobber (reg:CC 17))] "!TARGET_64BIT && reload_completed" - [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] 12)) + [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) @@ -5345,8 +4724,10 @@ (set_attr "ppro_uops" "few")]) (define_insn "*adddi3_cc_rex64" - [(set (reg:CC 17) (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") - (match_operand:DI 2 "x86_64_general_operand" "re,rm")] 12)) + [(set (reg:CC 17) + (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")] + UNSPEC_ADD_CARRY)) (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (plus:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" @@ -5382,8 +4763,10 @@ (set_attr "ppro_uops" "few")]) (define_insn "*addsi3_cc" - [(set (reg:CC 17) (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rm")] 12)) + [(set (reg:CC 17) + (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm")] + UNSPEC_ADD_CARRY)) (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (plus:SI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, SImode, operands)" @@ -5392,8 +4775,10 @@ (set_attr "mode" "SI")]) (define_insn "addqi3_cc" - [(set (reg:CC 17) (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qi,qm")] 12)) + [(set (reg:CC 17) + (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qi,qm")] + UNSPEC_ADD_CARRY)) (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (plus:QI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, QImode, operands)" @@ -5446,7 +4831,7 @@ (define_insn_and_split "*lea_general_1" [(set (match_operand 0 "register_operand" "=r") - (plus (plus (match_operand 1 "register_operand" "r") + (plus (plus (match_operand 1 "index_register_operand" "r") (match_operand 2 "register_operand" "r")) (match_operand 3 "immediate_operand" "i")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode @@ -5478,7 +4863,7 @@ (define_insn_and_split "*lea_general_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "r") (match_operand:SI 2 "register_operand" "r")) (match_operand:SI 3 "immediate_operand" "i"))))] "TARGET_64BIT" @@ -5498,7 +4883,7 @@ (define_insn_and_split "*lea_general_2" [(set (match_operand 0 "register_operand" "=r") - (plus (mult (match_operand 1 "register_operand" "r") + (plus (mult (match_operand 1 "index_register_operand" "r") (match_operand 2 "const248_operand" "i")) (match_operand 3 "nonmemory_operand" "ri")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode @@ -5528,7 +4913,7 @@ (define_insn_and_split "*lea_general_2_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "r") (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "nonmemory_operand" "ri"))))] "TARGET_64BIT" @@ -5547,7 +4932,7 @@ (define_insn_and_split "*lea_general_3" [(set (match_operand 0 "register_operand" "=r") - (plus (plus (mult (match_operand 1 "register_operand" "r") + (plus (plus (mult (match_operand 1 "index_register_operand" "r") (match_operand 2 "const248_operand" "i")) (match_operand 3 "register_operand" "r")) (match_operand 4 "immediate_operand" "i")))] @@ -5581,7 +4966,7 @@ (define_insn_and_split "*lea_general_3_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "r") (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "register_operand" "r")) (match_operand:SI 4 "immediate_operand" "i"))))] @@ -6318,9 +5703,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6359,9 +5742,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6401,9 +5782,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6440,9 +5819,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6478,9 +5855,7 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + if (operands[2] == constm1_rtx) return "inc{w}\t%0"; else if (operands[2] == const1_rtx) return "dec{w}\t%0"; @@ -6522,9 +5897,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6573,9 +5946,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) + else if (operands[2] == constm1_rtx) return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; abort(); @@ -6621,9 +5992,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) + else if (operands[2] == constm1_rtx) return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; abort(); @@ -6653,6 +6022,40 @@ (const_string "alu"))) (set_attr "mode" "QI,QI,SI")]) +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qnm"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[1] == const1_rtx) + return "inc{b}\t%0"; + else if (operands[1] == constm1_rtx) + return "dec{b}\t%0"; + abort(); + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */ + if (GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%1, %0|%0, %1}"; + } + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu1"))) + (set_attr "mode" "QI")]) + (define_insn "*addqi_2" [(set (reg 17) (compare @@ -7184,6 +6587,17 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + (define_insn "*subqi_2" [(set (reg 17) (compare @@ -7822,7 +7236,7 @@ (use (match_dup 3)) (clobber (reg:CC 17))])] { - /* Avoid use of cltd in favour of a mov+shift. */ + /* Avoid use of cltd in favor of a mov+shift. */ if (!TARGET_USE_CLTD && !optimize_size) { if (true_regnum (operands[1])) @@ -7907,7 +7321,7 @@ (use (match_dup 3)) (clobber (reg:CC 17))])] { - /* Avoid use of cltd in favour of a mov+shift. */ + /* Avoid use of cltd in favor of a mov+shift. */ if (!TARGET_USE_CLTD && !optimize_size) { if (true_regnum (operands[1])) @@ -8180,8 +7594,7 @@ (const_int 8)) (match_operand 1 "const_int_operand" "n")) (const_int 0)))] - "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) <= 0xff - && ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%1, %h0|%h0, %1}" [(set_attr "type" "test") (set_attr "mode" "QI") @@ -8319,10 +7732,57 @@ mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1; mask &= ~(((HOST_WIDE_INT)1 << pos) - 1); - operands[3] = gen_rtx_AND (mode, operands[0], - GEN_INT (trunc_int_for_mode (mask, mode))); + operands[3] = gen_rtx_AND (mode, operands[0], gen_int_mode (mask, mode)); }) +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the converison only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (reg 17) + (compare + (and (match_operand 0 "register_operand" "") + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "reload_completed + && QI_REG_P (operands[0]) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[1]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[1]) & ~(127 << 8)))) + && GET_MODE (operands[0]) != QImode" + [(set (reg:CCNO 17) + (compare:CCNO + (and:SI (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (const_int 0)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_int_mode (INTVAL (operands[1]) >> 8, SImode);") + +(define_split + [(set (reg 17) + (compare + (and (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "reload_completed + && (!REG_P (operands[0]) || ANY_QI_REG_P (operands[0])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[1]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[1]) & ~127))) + && GET_MODE (operands[0]) != QImode" + [(set (reg:CCNO 17) + (compare:CCNO + (and:QI (match_dup 0) + (match_dup 1)) + (const_int 0)))] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]);") + + ;; %%% This used to optimize known byte-wide and operations to memory, ;; and sometimes to QImode registers. If this is considered useful, ;; it should be done with splitters. @@ -8445,7 +7905,7 @@ (and (match_dup 0) (const_int -65536))) (clobber (reg:CC 17))] - "optimize_size" + "optimize_size || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)" [(set (strict_low_part (match_dup 1)) (const_int 0))] "operands[1] = gen_lowpart (HImode, operands[0]);") @@ -8591,7 +8051,8 @@ (and:QI (match_dup 0) (match_operand:QI 1 "general_operand" "qi,qmi"))) (clobber (reg:CC 17))] - "" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "and{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8627,7 +8088,9 @@ (const_int 0))) (set (strict_low_part (match_dup 0)) (and:QI (match_dup 0) (match_dup 1)))] - "ix86_match_ccmode (insn, CCNOmode)" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "and{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8647,7 +8110,7 @@ (const_int 8)) (match_operand 2 "const_int_operand" "n"))) (clobber (reg:CC 17))] - "(unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") (set_attr "length_immediate" "1") @@ -8675,8 +8138,7 @@ (const_int 8) (const_int 8)) (match_dup 2)))] - "ix86_match_ccmode (insn, CCNOmode) - && (unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "ix86_match_ccmode (insn, CCNOmode)" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") (set_attr "length_immediate" "1") @@ -8737,6 +8199,51 @@ [(set_attr "type" "alu") (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it intoduces risc +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Logical inclusive OR instructions @@ -8953,7 +8460,8 @@ (ior:QI (match_dup 0) (match_operand:QI 1 "general_operand" "qmi,qi"))) (clobber (reg:CC 17))] - "" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "or{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8978,7 +8486,9 @@ (const_int 0))) (set (strict_low_part (match_dup 0)) (ior:QI (match_dup 0) (match_dup 1)))] - "ix86_match_ccmode (insn, CCNOmode)" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "or{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8995,6 +8505,118 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "iorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC 17))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC 17))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "ior{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (ior:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Logical XOR instructions @@ -9210,11 +8832,77 @@ [(set_attr "type" "alu") (set_attr "mode" "QI,QI,SI")]) +(define_insn "*xorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (xor:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qi,qmi"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "xorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC 17))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + (define_insn "*xorqi_ext_1" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI (zero_extract:SI (match_operand 1 "ext_register_operand" "0") (const_int 8) (const_int 8)) @@ -9222,7 +8910,7 @@ (const_int 8) (const_int 8)))) (clobber (reg:CC 17))] - "" + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" "xor{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") (set_attr "length_immediate" "0") @@ -9242,6 +8930,20 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*xorqi_2_slp" + [(set (reg 17) + (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qim,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + (define_insn "*xorqi_cc_2" [(set (reg 17) (compare @@ -9316,6 +9018,46 @@ (match_dup 2)))])] "" "") + +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (xor:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since XOR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Negation instructions @@ -9519,8 +9261,7 @@ operands[0] = force_reg (SFmode, operands[0]); emit_move_insn (reg, gen_lowpart (SFmode, - GEN_INT (trunc_int_for_mode (0x80000000, - SImode)))); + gen_int_mode (0x80000000, SImode))); emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9599,22 +9340,22 @@ "#") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:SF (match_dup 1)))] "") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); + "operands[1] = gen_int_mode (0x80000000, SImode); operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") (define_split @@ -9631,7 +9372,7 @@ if (size >= 12) size = 10; operands[0] = adjust_address (operands[0], QImode, size - 1); - operands[1] = GEN_INT (trunc_int_for_mode (0x80, QImode)); + operands[1] = gen_int_mode (0x80, QImode); }) (define_expand "negdf2" @@ -9651,8 +9392,7 @@ in register. */ rtx reg = gen_reg_rtx (DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, - DImode)); + rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else rtx imm = immed_double_const (0, 0x80000000, DImode); #endif @@ -9688,9 +9428,9 @@ "#") (define_insn "*negdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Yr,*rm")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0"))) + (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9775,23 +9515,22 @@ "#") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:DF (match_dup 1)))] "") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 && reload_completed - && !FP_REGNO_P (REGNO (operands[0]))" + "!TARGET_64BIT && TARGET_80387 && reload_completed" [(parallel [(set (match_dup 3) (xor:SI (match_dup 3) (match_dup 4))) (clobber (reg:CC 17))])] - "operands[4] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); + "operands[4] = gen_int_mode (0x80000000, SImode); split_di (operands+0, 1, operands+2, operands+3);") (define_expand "negxf2" @@ -9820,19 +9559,19 @@ "#") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "fp_register_operand" "") (neg:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:XF (match_dup 1)))] "") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "") (neg:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (0x8000); @@ -9850,19 +9589,19 @@ "#") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "fp_register_operand" "") (neg:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:TF (match_dup 1)))] "") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "register_and_not_fp_reg_operand" "") (neg:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (0x8000); @@ -9982,8 +9721,7 @@ operands[0] = force_reg (SFmode, operands[0]); emit_move_insn (reg, gen_lowpart (SFmode, - GEN_INT (trunc_int_for_mode (0x80000000, - SImode)))); + gen_int_mode (0x80000000, SImode))); emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -10051,22 +9789,22 @@ "#") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387" [(set (match_dup 0) (abs:SF (match_dup 1)))] "") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); + "operands[1] = gen_int_mode (~0x80000000, SImode); operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") (define_split @@ -10083,7 +9821,7 @@ if (size >= 12) size = 10; operands[0] = adjust_address (operands[0], QImode, size - 1); - operands[1] = GEN_INT (trunc_int_for_mode (~0x80, QImode)); + operands[1] = gen_int_mode (~0x80, QImode); }) (define_expand "absdf2" @@ -10103,8 +9841,7 @@ in register. */ rtx reg = gen_reg_rtx (DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, - DImode)); + rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else rtx imm = immed_double_const (0, 0x80000000, DImode); #endif @@ -10205,23 +9942,22 @@ "#") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:DF (match_dup 1)))] "") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 && reload_completed && - !FP_REGNO_P (REGNO (operands[0]))" + "!TARGET_64BIT && TARGET_80387 && reload_completed" [(parallel [(set (match_dup 3) (and:SI (match_dup 3) (match_dup 4))) (clobber (reg:CC 17))])] - "operands[4] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); + "operands[4] = gen_int_mode (~0x80000000, SImode); split_di (operands+0, 1, operands+2, operands+3);") (define_expand "absxf2" @@ -10250,19 +9986,19 @@ "#") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "fp_register_operand" "") (abs:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:XF (match_dup 1)))] "") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "") (abs:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (~0x8000); @@ -10277,19 +10013,19 @@ "#") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "fp_register_operand" "") (abs:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:TF (match_dup 1)))] "") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "register_and_not_any_fp_reg_operand" "") (abs:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (~0x8000); @@ -10643,7 +10379,7 @@ return "sal{q}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{q}\t%0"; else return "sal{q}\t{%2, %0|%0, %2}"; @@ -10672,8 +10408,7 @@ [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))] - "operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - DImode));") + "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);") ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant @@ -10701,7 +10436,7 @@ return "sal{q}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{q}\t%0"; else return "sal{q}\t{%2, %0|%0, %2}"; @@ -10849,7 +10584,7 @@ return "sal{l}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%0"; else return "sal{l}\t{%2, %0|%0, %2}"; @@ -10870,7 +10605,7 @@ ;; Convert lea to the lea pattern to avoid flags dependency. (define_split [(set (match_operand 0 "register_operand" "") - (ashift (match_operand 1 "register_operand" "") + (ashift (match_operand 1 "index_register_operand" "") (match_operand:QI 2 "const_int_operand" ""))) (clobber (reg:CC 17))] "reload_completed @@ -10880,8 +10615,7 @@ rtx pat; operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - Pmode)); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); if (Pmode != SImode) pat = gen_rtx_SUBREG (SImode, pat, 0); @@ -10889,6 +10623,26 @@ DONE; }) +;; Rare case of shifting RSP is handled by generating move and shift +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat, clob; + emit_move_insn (operands[1], operands[0]); + pat = gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFT (GET_MODE (operands[0]), + operands[0], operands[2])); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob))); + DONE; +}) + (define_insn "*ashlsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,r") @@ -10911,7 +10665,7 @@ return "sal{l}\t{%b2, %k0|%k0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%k0"; else return "sal{l}\t{%2, %k0|%k0, %2}"; @@ -10939,8 +10693,7 @@ [(set (match_dup 0) (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))] { operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - Pmode)); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); }) ;; This pattern can't accept a variable shift count, since shifts by @@ -10969,7 +10722,7 @@ return "sal{l}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%0"; else return "sal{l}\t{%2, %0|%0, %2}"; @@ -11008,7 +10761,7 @@ return "sal{l}\t{%b2, %k0|%k0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%k0"; else return "sal{l}\t{%2, %k0|%k0, %2}"; @@ -11053,7 +10806,7 @@ return "sal{w}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11091,7 +10844,7 @@ return "sal{w}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11133,7 +10886,7 @@ return "sal{w}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11189,7 +10942,7 @@ } else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%0"; @@ -11245,7 +10998,7 @@ } else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%0"; @@ -11297,7 +11050,7 @@ return "sal{b}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{b}\t%0"; else return "sal{b}\t{%2, %0|%0, %2}"; @@ -11353,7 +11106,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11385,7 +11138,7 @@ (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") @@ -11542,7 +11295,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11556,7 +11309,7 @@ (match_operand:QI 2 "const_int_1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11597,7 +11350,7 @@ (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") @@ -11615,7 +11368,7 @@ (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") @@ -11666,7 +11419,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11698,7 +11451,7 @@ (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") @@ -11738,7 +11491,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11746,6 +11499,21 @@ (const_string "2") (const_string "*")))]) +(define_insn "*ashrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "sar{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*ashrqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") @@ -11758,6 +11526,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "QI")]) +(define_insn "*ashrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + sar{b}\t{%1, %0|%0, %1} + sar{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11770,7 +11551,7 @@ (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") @@ -11822,7 +11603,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11854,7 +11635,7 @@ (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") @@ -11932,7 +11713,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11946,7 +11727,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11988,7 +11769,7 @@ (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") @@ -12006,7 +11787,7 @@ (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") @@ -12057,7 +11838,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12089,7 +11870,7 @@ (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") @@ -12129,7 +11910,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12137,6 +11918,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*lshrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "shr{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*lshrqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") @@ -12149,6 +11944,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "QI")]) +(define_insn "*lshrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + shr{b}\t{%1, %0|%0, %1} + shr{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -12161,7 +11969,7 @@ (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") @@ -12203,9 +12011,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{q}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") @@ -12220,7 +12028,7 @@ "@ rol{q}\t{%2, %0|%0, %2} rol{q}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "DI")]) (define_expand "rotlsi3" @@ -12237,9 +12045,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{l}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") @@ -12252,9 +12060,9 @@ (match_operand:QI 2 "const_int_1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{l}\t%k0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "length" "2")]) (define_insn "*rotlsi3_1" @@ -12266,7 +12074,7 @@ "@ rol{l}\t{%2, %0|%0, %2} rol{l}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_insn "*rotlsi3_1_zext" @@ -12279,7 +12087,7 @@ "@ rol{l}\t{%2, %k0|%k0, %2} rol{l}\t{%b2, %k0|%k0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_expand "rotlhi3" @@ -12296,9 +12104,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{w}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") @@ -12313,7 +12121,7 @@ "@ rol{w}\t{%2, %0|%0, %2} rol{w}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "HI")]) (define_expand "rotlqi3" @@ -12324,20 +12132,47 @@ "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;") +(define_insn "*rotlqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "rol{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*rotlqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{b}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*rotlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + rol{b}\t{%1, %0|%0, %1} + rol{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + (define_insn "*rotlqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") @@ -12347,7 +12182,7 @@ "@ rol{b}\t{%2, %0|%0, %2} rol{b}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "QI")]) (define_expand "rotrdi3" @@ -12364,9 +12199,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{q}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") @@ -12381,7 +12216,7 @@ "@ ror{q}\t{%2, %0|%0, %2} ror{q}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "DI")]) (define_expand "rotrsi3" @@ -12398,9 +12233,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{l}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") @@ -12413,9 +12248,9 @@ (match_operand:QI 2 "const_int_1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{l}\t%k0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") @@ -12430,7 +12265,7 @@ "@ ror{l}\t{%2, %0|%0, %2} ror{l}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_insn "*rotrsi3_1_zext" @@ -12443,7 +12278,7 @@ "@ ror{l}\t{%2, %k0|%k0, %2} ror{l}\t{%b2, %k0|%k0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_expand "rotrhi3" @@ -12460,9 +12295,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{w}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") @@ -12477,7 +12312,7 @@ "@ ror{w}\t{%2, %0|%0, %2} ror{w}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "HI")]) (define_expand "rotrqi3" @@ -12494,9 +12329,23 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{b}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "ror{b}\t%0" + [(set_attr "type" "rotate1") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") @@ -12511,7 +12360,20 @@ "@ ror{b}\t{%2, %0|%0, %2} ror{b}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_insn "*rotrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + ror{b}\t{%1, %0|%0, %1} + ror{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") (set_attr "mode" "QI")]) ;; Bit set / bit test instructions @@ -12789,7 +12651,7 @@ (match_operand:SF 3 "nonimmediate_operand" "xm")]))] "TARGET_SSE && reload_completed" "cmp%D1ss\t{%3, %0|%0, %3}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "SF")]) (define_insn "*sse_setccdf" @@ -12799,7 +12661,7 @@ (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] "TARGET_SSE2 && reload_completed" "cmp%D1sd\t{%3, %0|%0, %3}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "DF")]) ;; Basic conditional jump instructions. @@ -12961,13 +12823,14 @@ "" "%+j%C1\t%l0" [(set_attr "type" "ibr") - (set (attr "prefix_0f") + (set_attr "modrm" "0") + (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) - (const_int 0) - (const_int 1)))]) + (const_int 128))) + (const_int 2) + (const_int 6)))]) (define_insn "*jcc_2" [(set (pc) @@ -12978,13 +12841,14 @@ "" "%+j%c1\t%l0" [(set_attr "type" "ibr") - (set (attr "prefix_0f") + (set_attr "modrm" "0") + (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) - (const_int 0) - (const_int 1)))]) + (const_int 128))) + (const_int 2) + (const_int 6)))]) ;; In general it is not safe to assume too much about CCmode registers, ;; so simplify-rtx stops when it sees a second one. Under certain @@ -13244,7 +13108,15 @@ (label_ref (match_operand 0 "" "")))] "" "jmp\t%l0" - [(set_attr "type" "ibr")]) + [(set_attr "type" "ibr") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 5))) + (set_attr "modrm" "0")]) (define_expand "indirect_jump" [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] @@ -13270,29 +13142,34 @@ (use (label_ref (match_operand 1 "" "")))])] "" { - /* In PIC mode, the table entries are stored GOT-relative. Convert - the relative address to an absolute address. */ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ if (flag_pic) { + rtx op0, op1; + enum rtx_code code; + if (TARGET_64BIT) - operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - gen_rtx_LABEL_REF (Pmode, operands[1]), - NULL_RTX, 0, - OPTAB_DIRECT); - else if (HAVE_AS_GOTOFF_IN_DATA) { - operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - pic_offset_table_rtx, NULL_RTX, - 1, OPTAB_DIRECT); - current_function_uses_pic_offset_table = 1; + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) + { + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; } else { - operands[0] = expand_simple_binop (Pmode, MINUS, pic_offset_table_rtx, - operands[0], NULL_RTX, 1, - OPTAB_DIRECT); - current_function_uses_pic_offset_table = 1; + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); } }) @@ -13357,14 +13234,17 @@ return "dec{l}\t%1\;%+jne\t%l0"; } [(set_attr "ppro_uops" "many") - (set (attr "type") + (set (attr "length") (if_then_else (and (eq_attr "alternative" "0") (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124)))) - (const_string "ibr") - (const_string "multi")))]) + (const_int 128)))) + (const_int 2) + (const_int 16))) + ;; We don't know the type before shorten branches. Optimistically expect + ;; the loop instruction to match. + (set (attr "type") (const_string "ibr"))]) (define_split [(set (pc) @@ -13474,21 +13354,8 @@ (match_operand:SI 3 "" "")))])] "!TARGET_64BIT" { - if (operands[3] == const0_rtx) - { - emit_insn (gen_call (operands[0], operands[1], constm1_rtx)); - DONE; - } - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) - XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - if (TARGET_64BIT) - abort(); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3]); + DONE; }) (define_insn "*call_pop_0" @@ -13530,37 +13397,12 @@ [(call (match_operand:QI 0 "" "") (match_operand 1 "" "")) (use (match_operand 2 "" ""))] - ;; Operand 1 not used on the i386. "" { - rtx insn; - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) - current_function_uses_pic_offset_table = 1; - - if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) - XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - if (TARGET_64BIT && INTVAL (operands[2]) >= 0) - { - rtx reg = gen_rtx_REG (QImode, 0); - emit_move_insn (reg, operands[2]); - insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); - DONE; - } - insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); - DONE; + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL); + DONE; }) -(define_expand "call_exp" - [(call (match_operand:QI 0 "" "") - (match_operand 1 "" ""))] - "" - "") - (define_insn "*call_0" [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) (match_operand 1 "" ""))] @@ -13612,7 +13454,6 @@ [(set_attr "type" "call")]) ;; Call subroutine, returning value in operand 0 -;; (which must be a hard register). (define_expand "call_value_pop" [(parallel [(set (match_operand 0 "" "") @@ -13623,20 +13464,9 @@ (match_operand:SI 4 "" "")))])] "!TARGET_64BIT" { - if (operands[4] == const0_rtx) - { - emit_insn (gen_call_value (operands[0], operands[1], operands[2], - constm1_rtx)); - DONE; - } - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) - XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4]); + DONE; }) (define_expand "call_value" @@ -13647,36 +13477,10 @@ ;; Operand 2 not used on the i386. "" { - rtx insn; - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) - XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); - if (TARGET_64BIT && INTVAL (operands[3]) >= 0) - { - rtx reg = gen_rtx_REG (QImode, 0); - emit_move_insn (reg, operands[3]); - insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], - operands[2])); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); - DONE; - } - insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], - operands[2])); + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL); DONE; }) -(define_expand "call_value_exp" - [(set (match_operand 0 "" "") - (call (match_operand:QI 1 "" "") - (match_operand:SI 2 "" "")))] - "" - "") - ;; Call subroutine returning any type. (define_expand "untyped_call" @@ -13693,12 +13497,10 @@ simply pretend the untyped call returns a complex long double value. */ - emit_call_insn (TARGET_FLOAT_RETURNS_IN_80387 - ? gen_call_value (gen_rtx_REG (XCmode, FIRST_FLOAT_REG), - operands[0], const0_rtx, - GEN_INT (SSE_REGPARM_MAX - 1)) - : gen_call (operands[0], const0_rtx, - GEN_INT (SSE_REGPARM_MAX - 1))); + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1), + NULL); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -13710,7 +13512,7 @@ registers we stored in the result block. We avoid problems by claiming that all hard registers are used and clobbered at this point. */ - emit_insn (gen_blockage ()); + emit_insn (gen_blockage (const0_rtx)); DONE; }) @@ -13721,7 +13523,7 @@ ;; all of memory. This blocks insns from being moved across this point. (define_insn "blockage" - [(unspec_volatile [(const_int 0)] 0)] + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)] "" "" [(set_attr "length" "0")]) @@ -13781,45 +13583,14 @@ "" "ix86_expand_prologue (); DONE;") -(define_insn "prologue_set_got" +(define_insn "set_got" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI - [(plus:SI (match_dup 0) - (plus:SI (match_operand:SI 1 "symbolic_operand" "") - (minus:SI (pc) (match_operand 2 "" ""))))] 1)) + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC 17))] "!TARGET_64BIT" -{ - if (GET_CODE (operands[2]) == LABEL_REF) - operands[2] = XEXP (operands[2], 0); - if (TARGET_DEEP_BRANCH_PREDICTION) - return "add{l}\t{%1, %0|%0, %1}"; - else - return "add{l}\t{%1+[.-%X2], %0|%0, %a1+(.-%X2)}"; -} - [(set_attr "type" "alu") - ; Since this insn may have two constant operands, we must set the - ; length manually. - (set_attr "length_immediate" "4") - (set_attr "mode" "SI")]) - -(define_insn "prologue_get_pc" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI [(plus:SI (pc) (match_operand 1 "" ""))] 2))] - "!TARGET_64BIT" -{ - if (GET_CODE (operands[1]) == LABEL_REF) - operands[1] = XEXP (operands[1], 0); - output_asm_insn ("call\t%X1", operands); - if (! TARGET_DEEP_BRANCH_PREDICTION) - { - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[1])); - return "pop{l}\t%0"; - } - RET; -} - [(set_attr "type" "multi")]) + { return output_set_got (operands[0]); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) (define_expand "epilogue" [(const_int 1)] @@ -13832,11 +13603,10 @@ "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "register_operand" ""))] + [(use (match_operand 0 "register_operand" ""))] "" { - rtx tmp, sa = operands[0], ra = operands[1]; + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the @@ -13855,7 +13625,8 @@ }) (define_insn_and_split "eh_return_si" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] 13)] + [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] + UNSPECV_EH_RETURN)] "!TARGET_64BIT" "#" "reload_completed" @@ -13863,7 +13634,8 @@ "ix86_expand_epilogue (2); DONE;") (define_insn_and_split "eh_return_di" - [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] 13)] + [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] + UNSPECV_EH_RETURN)] "TARGET_64BIT" "#" "reload_completed" @@ -13879,7 +13651,6 @@ [(set_attr "length_immediate" "0") (set_attr "length" "1") (set_attr "modrm" "0") - (set_attr "modrm" "0") (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) @@ -13892,13 +13663,12 @@ [(set_attr "length_immediate" "0") (set_attr "length" "1") (set_attr "modrm" "0") - (set_attr "modrm" "0") (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); @@ -13987,7 +13757,7 @@ (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_dup 1)] 5))] + (unspec:SI [(match_dup 1)] UNSPEC_BSF))] "" "bsf{l}\t{%1, %0|%0, %1}" [(set_attr "prefix_0f" "1") @@ -13996,6 +13766,173 @@ ;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger ;; and slower than the two-byte movzx insn needed to do the work in SImode. +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "*tls_global_dynamic_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]} + push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop" + [(set_attr "type" "multi") + (set_attr "length" "14")]) + +(define_expand "tls_global_dynamic_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(match_dup 2) + (match_operand:SI 1 "tls_symbolic_operand" "") + (match_dup 3)] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 "")) + (clobber (reg:CC 17))])] + "" +{ + if (flag_pic) + operands[2] = pic_offset_table_rtx; + else + { + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[2])); + } + operands[3] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_global_dynamic_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" + ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +(define_expand "tls_global_dynamic_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call (mem:QI (match_dup 2)) (const_int 0))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)])] + "" +{ + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_insn "*tls_local_dynamic_base_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]} + push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3" + [(set_attr "type" "multi") + (set_attr "length" "13")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 2)] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:CC 17))])] + "" +{ + if (flag_pic) + operands[1] = pic_offset_table_rtx; + else + { + operands[1] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[1])); + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_local_dynamic_base_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call (mem:QI (match_dup 1)) (const_int 0))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "" +{ + operands[1] = ix86_tls_get_addr (); +}) + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand:SI 3 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC 17))] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC 17))])] + "") + ;; These patterns match the binary 387 instructions for addM3, subM3, ;; mulM3 and divM3. There are three patterns for each of DFmode and ;; SFmode. The first is the normal insn, the second the same insn but @@ -14032,7 +13969,9 @@ "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1") - (const_string "sse") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) (if_then_else (match_operand:SF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) @@ -14046,7 +13985,10 @@ "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) (set_attr "mode" "SF")]) (define_insn "*fop_df_comm_nosse" @@ -14075,7 +14017,9 @@ "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1") - (const_string "sse") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) (if_then_else (match_operand:SF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) @@ -14090,7 +14034,10 @@ && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) (set_attr "mode" "DF")]) (define_insn "*fop_xf_comm" @@ -14148,8 +14095,14 @@ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "sse") + (cond [(and (eq_attr "alternative" "2") + (match_operand:SF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:SF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") (match_operand:SF 3 "mult_operator" "") (const_string "fmul") (match_operand:SF 3 "div_operator" "") @@ -14166,7 +14119,13 @@ "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:SF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) (set_attr "mode" "SF")]) ;; ??? Add SSE splitters for these! @@ -14218,7 +14177,7 @@ [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") (const_string "fmul") - (match_operand:DF 3 "div_operator" "") + (match_operand:DF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) @@ -14235,8 +14194,14 @@ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "sse") + (cond [(and (eq_attr "alternative" "2") + (match_operand:SF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:SF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") (match_operand:DF 3 "mult_operator" "") (const_string "fmul") (match_operand:DF 3 "div_operator" "") @@ -14253,7 +14218,14 @@ "TARGET_SSE2 && TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse")]) + [(set_attr "mode" "DF") + (set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:SF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd")))]) ;; ??? Add SSE splitters for these! (define_insn "*fop_df_2" @@ -14760,7 +14732,7 @@ (define_insn "sindf2" [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 1))] + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14769,7 +14741,7 @@ (define_insn "sinsf2" [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 1))] + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14779,7 +14751,8 @@ (define_insn "*sinextendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 1))] + (match_operand:SF 1 "register_operand" "0"))] + UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14788,7 +14761,7 @@ (define_insn "sinxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 1))] + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fsin" @@ -14797,7 +14770,7 @@ (define_insn "sintf2" [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 1))] + (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14806,7 +14779,7 @@ (define_insn "cosdf2" [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 2))] + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -14815,7 +14788,7 @@ (define_insn "cossf2" [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 2))] + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -14825,7 +14798,8 @@ (define_insn "*cosextendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 2))] + (match_operand:SF 1 "register_operand" "0"))] + UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -14834,8 +14808,8 @@ (define_insn "cosxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] + "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -14843,7 +14817,7 @@ (define_insn "costf2" [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 2))] + (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -15737,7 +15711,7 @@ [(set (match_operand:SI 0 "register_operand" "") (unspec:SI [(match_operand:BLK 1 "general_operand" "") (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] 0))] + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -15750,7 +15724,7 @@ [(set (match_operand:DI 0 "register_operand" "") (unspec:DI [(match_operand:BLK 1 "general_operand" "") (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] 0))] + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -15764,7 +15738,7 @@ (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:SI 4 "register_operand" "0")] 0)) + (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS)) (use (reg:SI 19)) (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (reg:CC 17))] @@ -15779,7 +15753,7 @@ (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:DI 3 "immediate_operand" "i") - (match_operand:DI 4 "register_operand" "0")] 0)) + (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS)) (use (reg:SI 19)) (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (reg:CC 17))] @@ -15895,6 +15869,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") (set_attr "mode" "DI") @@ -15937,6 +15912,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") (set_attr "mode" "SI") @@ -16043,12 +16019,12 @@ (set_attr "mode" "DF")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(match_operand 4 "" "") (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "") (match_operand:DF 3 "nonimmediate_operand" "")))] - "!TARGET_64BIT && !ANY_FP_REG_P (operands[0]) && reload_completed" + "!TARGET_64BIT && reload_completed" [(set (match_dup 2) (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) (match_dup 5) @@ -16156,13 +16132,13 @@ ;; We can't represent the LT test directly. Do this by swapping the operands. (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "register_operand" "")) (match_operand:SF 3 "register_operand" "") (match_operand:SF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16238,13 +16214,13 @@ ;; We can't represent the LT test directly. Do this by swapping the operands. (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "register_operand" "")) (match_operand:DF 3 "register_operand" "") (match_operand:DF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16319,13 +16295,13 @@ (match_dup 2)))]) (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "register_operand" "")) (match_operand:SF 3 "register_operand" "") (match_operand:SF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16400,13 +16376,13 @@ (match_dup 2)))]) (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "register_operand" "")) (match_operand:DF 3 "register_operand" "") (match_operand:DF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16628,7 +16604,7 @@ ;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison ;; and op2, op0 - zero op2 if comparison was false ;; nand op0, op3 - load op3 to op0 if comparison was false -;; or op2, op0 - get the non-zero one into the result. +;; or op2, op0 - get the nonzero one into the result. (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "sse_comparison_operator" @@ -16744,7 +16720,7 @@ (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "register_operand" "") + [(match_operand 4 "nonimmediate_operand" "") (match_operand 5 "nonimmediate_operand" "")]) (match_operand 2 "nonmemory_operand" "") (match_operand 3 "nonmemory_operand" "")))] @@ -16756,13 +16732,16 @@ (subreg:TI (match_dup 7) 0)))] { PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode)) + if (!sse_comparison_operator (operands[1], VOIDmode) + || !rtx_equal_p (operands[0], operands[4])) { rtx tmp = operands[5]; operands[5] = operands[4]; operands[4] = tmp; PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); } + if (!rtx_equal_p (operands[0], operands[4])) + abort (); if (const0_operand (operands[2], GET_MODE (operands[0]))) { operands[7] = operands[3]; @@ -16788,7 +16767,7 @@ }) (define_insn "allocate_stack_worker_1" - [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3) + [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] UNSPEC_STACK_PROBE) (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0))) (clobber (match_dup 0)) (clobber (reg:CC 17))] @@ -16798,7 +16777,7 @@ (set_attr "length" "5")]) (define_insn "allocate_stack_worker_rex64" - [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] 3) + [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] UNSPEC_STACK_PROBE) (set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0))) (clobber (match_dup 0)) (clobber (reg:CC 17))] @@ -16835,7 +16814,7 @@ [(label_ref (match_operand 0 "" ""))] "!TARGET_64BIT && flag_pic" { - load_pic_register (); + emit_insn (gen_set_got (pic_offset_table_rtx)); DONE; }) @@ -16849,7 +16828,8 @@ (clobber (reg:CC 17))] "! TARGET_PARTIAL_REG_STALL && reload_completed && ((GET_MODE (operands[0]) == HImode - && (!optimize_size || GET_CODE (operands[2]) != CONST_INT + && ((!optimize_size && !TARGET_FAST_PREFIX) + || GET_CODE (operands[2]) != CONST_INT || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))) || (GET_MODE (operands[0]) == QImode && (TARGET_PROMOTE_QImode || optimize_size)))" @@ -16862,6 +16842,10 @@ operands[2] = gen_lowpart (SImode, operands[2]); PUT_MODE (operands[3], SImode);") +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. (define_split [(set (reg 17) (compare (and (match_operand 1 "aligned_operand" "") @@ -16870,39 +16854,44 @@ (set (match_operand 0 "register_operand" "") (and (match_dup 1) (match_dup 2)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode) + && ! optimize_size + && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" [(parallel [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) (const_int 0))) (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))])] "operands[2] - = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode)); + = gen_int_mode (INTVAL (operands[2]) + & GET_MODE_MASK (GET_MODE (operands[0])), + SImode); operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]);") +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. (define_split [(set (reg 17) - (compare (and (match_operand 0 "aligned_operand" "") - (match_operand 1 "const_int_operand" "")) + (compare (and (match_operand:HI 0 "aligned_operand" "") + (match_operand:HI 1 "const_int_operand" "")) (const_int 0)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode) + && ! TARGET_FAST_PREFIX + && ! optimize_size" [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) (const_int 0)))] "operands[1] - = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode)); + = gen_int_mode (INTVAL (operands[1]) + & GET_MODE_MASK (GET_MODE (operands[0])), + SImode); operands[0] = gen_lowpart (SImode, operands[0]);") (define_split @@ -17152,7 +17141,8 @@ (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode) && (true_regnum (operands[0]) != 0 - || CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K')) + || (GET_CODE (operands[1]) == CONST_INT + && CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K'))) && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" [(parallel [(set (reg:CCNO 17) @@ -17375,7 +17365,7 @@ [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "const_int_operand" "")) 0))] - "exact_log2 (INTVAL (operands[1])) >= 0 + "exact_log2 (INTVAL (operands[2])) >= 0 && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) @@ -17853,52 +17843,92 @@ ;; Moves for SSE/MMX regs. (define_insn "movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) (define_insn "movv4si_internal" - [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_insn "movv2di_internal" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "@ + pxor\t%0, %0 + movdqa\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) (define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) (define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) (define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] - "TARGET_3DNOW" - "movq\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))] + "TARGET_3DNOW + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_expand "movti" - [(set (match_operand:TI 0 "general_operand" "") - (match_operand:TI 1 "general_operand" ""))] + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) @@ -17908,9 +17938,72 @@ DONE; }) +(define_insn "movv2df_internal" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorpd\t%0, %0 + movapd\t{%1, %0|%0, %1} + movapd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "movv8hi_internal" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_insn "movv16qi_internal" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_expand "movv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V2DFmode, operands); + DONE; +}) + +(define_expand "movv8hi" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V8HImode, operands); + DONE; +}) + +(define_expand "movv16qi" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "") + (match_operand:V16QI 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V16QImode, operands); + DONE; +}) + (define_expand "movv4sf" - [(set (match_operand:V4SF 0 "general_operand" "") - (match_operand:V4SF 1 "general_operand" ""))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SFmode, operands); @@ -17918,17 +18011,26 @@ }) (define_expand "movv4si" - [(set (match_operand:V4SI 0 "general_operand" "") - (match_operand:V4SI 1 "general_operand" ""))] - "TARGET_MMX" + [(set (match_operand:V4SI 0 "nonimmediate_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" ""))] + "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); DONE; }) +(define_expand "movv2di" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "") + (match_operand:V2DI 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V2DImode, operands); + DONE; +}) + (define_expand "movv2si" - [(set (match_operand:V2SI 0 "general_operand" "") - (match_operand:V2SI 1 "general_operand" ""))] + [(set (match_operand:V2SI 0 "nonimmediate_operand" "") + (match_operand:V2SI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V2SImode, operands); @@ -17936,8 +18038,8 @@ }) (define_expand "movv4hi" - [(set (match_operand:V4HI 0 "general_operand" "") - (match_operand:V4HI 1 "general_operand" ""))] + [(set (match_operand:V4HI 0 "nonimmediate_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V4HImode, operands); @@ -17945,8 +18047,8 @@ }) (define_expand "movv8qi" - [(set (match_operand:V8QI 0 "general_operand" "") - (match_operand:V8QI 1 "general_operand" ""))] + [(set (match_operand:V8QI 0 "nonimmediate_operand" "") + (match_operand:V8QI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V8QImode, operands); @@ -17954,14 +18056,97 @@ }) (define_expand "movv2sf" - [(set (match_operand:V2SF 0 "general_operand" "") - (match_operand:V2SF 1 "general_operand" ""))] + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] "TARGET_3DNOW" { ix86_expand_vector_move (V2SFmode, operands); DONE; }) +(define_insn "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "register_operand" "y"))] + "TARGET_3DNOW" + "#") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "!TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + + (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") (match_operand:TI 1 "nonmemory_operand" "x"))] @@ -17971,7 +18156,51 @@ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TI (reg:SI 7)) (match_dup 1))] "" - [(set_attr "type" "sse")]) + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V2DF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V2DI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V8HI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V16QI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) (define_insn_and_split "*pushv4sf" [(set (match_operand:V4SF 0 "push_operand" "=<") @@ -17982,7 +18211,7 @@ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:V4SF (reg:SI 7)) (match_dup 1))] "" - [(set_attr "type" "sse")]) + [(set_attr "type" "multi")]) (define_insn_and_split "*pushv4si" [(set (match_operand:V4SI 0 "push_operand" "=<") @@ -17993,7 +18222,7 @@ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:V4SI (reg:SI 7)) (match_dup 1))] "" - [(set_attr "type" "sse")]) + [(set_attr "type" "multi")]) (define_insn_and_split "*pushv2si" [(set (match_operand:V2SI 0 "push_operand" "=<") @@ -18041,17 +18270,19 @@ (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "general_operand" "O,xm,x"))] - "TARGET_SSE && !TARGET_64BIT" + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ xorps\t%0, %0 movaps\t{%1, %0|%0, %1} movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov,ssemov,ssemov") + (set_attr "mode" "V4SF")]) (define_insn "*movti_rex64" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ @@ -18060,8 +18291,8 @@ xorps\t%0, %0 movaps\\t{%1, %0|%0, %1} movaps\\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,sse,sse,sse") - (set_attr "mode" "TI")]) + [(set_attr "type" "*,*,ssemov,ssemov,ssemov") + (set_attr "mode" "V4SF")]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -18073,74 +18304,116 @@ ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups -(define_insn "sse_movaps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))] +(define_expand "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVA))] "TARGET_SSE" - "@ - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movaps (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) -(define_insn "sse_movups" +(define_insn "*sse_movaps_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))] + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVA))] + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov,ssemov") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVU))] "TARGET_SSE" - "@ - movups\t{%1, %0|%0, %1} - movups\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movups (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) +(define_insn "*sse_movups_1" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movups\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssecvt") + (set_attr "mode" "V4SF")]) ;; SSE Strange Moves. (define_insn "sse_movmskps" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))] + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] "TARGET_SSE" "movmskps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "mmx_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] + UNSPEC_MOVMSK))] "TARGET_SSE || TARGET_3DNOW_A" "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + (define_insn "mmx_maskmovq" [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] 32))] + (match_operand:V8QI 2 "register_operand" "y")] + UNSPEC_MASKMOV))] "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_maskmovq_rex" [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] 32))] + (match_operand:V8QI 2 "register_operand" "y")] + UNSPEC_MASKMOV))] "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "sse_movntv4sf" [(set (match_operand:V4SF 0 "memory_operand" "=m") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))] + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVNT))] "TARGET_SSE" "movntps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) (define_insn "sse_movntdi" [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))] + (unspec:DI [(match_operand:DI 1 "register_operand" "y")] + UNSPEC_MOVNT))] "TARGET_SSE || TARGET_3DNOW_A" "movntq\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) (define_insn "sse_movhlps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18154,7 +18427,8 @@ (const_int 3)))] "TARGET_SSE" "movhlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_movlhps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18168,7 +18442,8 @@ (const_int 12)))] "TARGET_SSE" "movlhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_movhps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") @@ -18179,7 +18454,8 @@ "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_movlps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") @@ -18190,17 +18466,29 @@ "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) (define_insn "sse_movss" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18210,7 +18498,8 @@ (const_int 1)))] "TARGET_SSE" "movss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) (define_insn "sse_storess" [(set (match_operand:SF 0 "memory_operand" "=m") @@ -18219,17 +18508,20 @@ (parallel [(const_int 0)])))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) (define_insn "sse_shufps" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "immediate_operand" "i")] 41))] + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] "TARGET_SSE" ;; @@@ check operand order for intel/nonintel syntax "shufps\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) ;; SSE arithmetic @@ -18240,7 +18532,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "addps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) (define_insn "vmaddv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18251,7 +18544,8 @@ (const_int 1)))] "TARGET_SSE" "addss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) (define_insn "subv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18259,7 +18553,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "subps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) (define_insn "vmsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18270,7 +18565,8 @@ (const_int 1)))] "TARGET_SSE" "subss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) (define_insn "mulv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18278,7 +18574,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "mulps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemul") + (set_attr "mode" "V4SF")]) (define_insn "vmmulv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18289,7 +18586,8 @@ (const_int 1)))] "TARGET_SSE" "mulss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemul") + (set_attr "mode" "SF")]) (define_insn "divv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18297,7 +18595,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "divps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssediv") + (set_attr "mode" "V4SF")]) (define_insn "vmdivv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18308,7 +18607,8 @@ (const_int 1)))] "TARGET_SSE" "divss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssediv") + (set_attr "mode" "SF")]) ;; SSE square root/reciprocal @@ -18316,45 +18616,52 @@ (define_insn "rcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))] + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] "TARGET_SSE" "rcpps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmrcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42) + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP) (match_operand:V4SF 2 "register_operand" "0") (const_int 1)))] "TARGET_SSE" "rcpss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) (define_insn "rsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))] + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] "TARGET_SSE" "rsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43) + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT) (match_operand:V4SF 2 "register_operand" "0") (const_int 1)))] "TARGET_SSE" "rsqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) (define_insn "sqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE" "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18364,206 +18671,358 @@ (const_int 1)))] "TARGET_SSE" "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) ;; SSE logical operations. +;; SSE defines logical operations on floating point values. This brings +;; interesting challenge to RTL representation where logicals are only valid +;; on integral types. We deal with this by representing the floating point +;; logical as logical on arguments casted to TImode as this is what hardware +;; really does. Unfortunately hardware requires the type information to be +;; present and thus we must avoid subregs from being simplified and elliminated +;; in later compilation phases. +;; +;; We have following variants from each instruction: +;; sse_andsf3 - the operation taking V4SF vector operands +;; and doing TImode cast on them +;; *sse_andsf3_memory - the operation taking one memory operand casted to +;; TImode, since backend insist on elliminating casts +;; on memory operands +;; sse_andti3_sf_1 - the operation taking SF scalar operands. +;; We can not accept memory operand here as instruction reads +;; whole scalar. This is generated only post reload by GCC +;; scalar float operations that expands to logicals (fabs) +;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode +;; memory operand. Eventually combine can be able +;; to synthetize these using splitter. +;; sse2_anddf3, *sse2_anddf3_memory +;; +;; ;; These are not called andti3 etc. because we really really don't want ;; the compiler to widen DImode ands to TImode ands and then try to move ;; into DImode subregs of SSE registers, and them together, and move out ;; of DImode subregs again! +;; SSE1 single precision floating point logical operation +(define_expand "sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") -(define_insn "*sse_andti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_andti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_insn "*sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_1" +(define_insn "*sse_andsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") + +(define_insn "*sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_2" +(define_insn "*sse_nandsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "sse_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_expand "sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") + +(define_insn "*sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_insn "*sse_iorsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_nandti3_df" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_expand "sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "") + +(define_insn "*sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_nandti3_sf" +(define_insn "*sse_xorsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "sse_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) +;; SSE2 double precision floating point logical operation + +(define_expand "sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_nandti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" - "pnand\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "") -(define_insn "*sse_iorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] +(define_insn "*sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_df_2" +(define_insn "*sse_nandti3_df" [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "Ym")))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_iorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_expand "sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") -(define_insn "sse_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") +(define_insn "*sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") +(define_insn "*sse2_iordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_xorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] +(define_expand "sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" + "") + +(define_insn "*sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_xorti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" +(define_insn "*sse2_xordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_xorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +;; SSE2 integral logicals. These patterns must always come after floating +;; point ones since we don't want compiler to use integer opcodes on floating +;; point SSE values to avoid matching of subregs in the match_operand. +(define_insn "*sse2_andti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) +(define_insn "sse2_andv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "TARGET_SSE2" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) -(define_insn "sse_xorti3" +(define_insn "sse2_nandv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0")) + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_iorti3" [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_iorv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sse2" +(define_insn "*sse2_xorti3" [(set (match_operand:TI 0 "register_operand" "=x") (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_xorv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) ;; Use xor, but don't show input operands so they aren't live before ;; this insn. (define_insn "sse_clrv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] 45))] + (unspec:V4SF [(const_int 0)] UNSPEC_NOP))] "TARGET_SSE" "xorps\t{%0, %0|%0, %0}" - [(set_attr "type" "sse") - (set_attr "memory" "none")]) + [(set_attr "type" "sselog") + (set_attr "memory" "none") + (set_attr "mode" "V4SF")]) + +;; Use xor, but don't show input operands so they aren't live before +;; this insn. +(define_insn "sse_clrv2df" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(const_int 0)] UNSPEC_NOP))] + "TARGET_SSE2" + "xorpd\t{%0, %0|%0, %0}" + [(set_attr "type" "sselog") + (set_attr "memory" "none") + (set_attr "mode" "V4SF")]) ;; SSE mask-generating compares @@ -18574,7 +19033,8 @@ (match_operand:V4SF 2 "register_operand" "x")]))] "TARGET_SSE" "cmp%D3ps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) (define_insn "maskncmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -18589,7 +19049,8 @@ else return "cmpn%D3ps\t{%2, %0|%0, %2}"; } - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) (define_insn "vmmaskcmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -18597,11 +19058,12 @@ (match_operator:V4SI 3 "sse_comparison_operator" [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "register_operand" "x")]) - (match_dup 1) + (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" "cmp%D3ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) (define_insn "vmmaskncmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -18619,33 +19081,34 @@ else return "cmpn%D3ss\t{%2, %0|%0, %2}"; } - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) (define_insn "sse_comi" [(set (reg:CCFP 17) - (match_operator:CCFP 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFP (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE" "comiss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) (define_insn "sse_ucomi" [(set (reg:CCFPU 17) - (match_operator:CCFPU 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFPU (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE" "ucomiss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) ;; SSE unpack @@ -18666,7 +19129,8 @@ (const_int 5)))] "TARGET_SSE" "unpckhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_unpcklps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18684,7 +19148,8 @@ (const_int 5)))] "TARGET_SSE" "unpcklps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) ;; SSE min/max @@ -18695,7 +19160,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmsmaxv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18706,7 +19172,8 @@ (const_int 1)))] "TARGET_SSE" "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) (define_insn "sminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18714,7 +19181,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmsminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18725,8 +19193,8 @@ (const_int 1)))] "TARGET_SSE" "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) ;; SSE <-> integer/MMX conversions @@ -18739,7 +19207,8 @@ (const_int 12)))] "TARGET_SSE" "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "cvtps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -18748,16 +19217,19 @@ (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvtps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "cvttps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX) (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvttps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) (define_insn "cvtsi2ss" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18768,7 +19240,21 @@ (const_int 14)))] "TARGET_SSE" "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0,0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") @@ -18777,16 +19263,42 @@ (parallel [(const_int 0)])))] "TARGET_SSE" "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX) (parallel [(const_int 0)])))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] + UNSPEC_FIX) + (parallel [(const_int 0)])))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,vector")]) ;; MMX insns @@ -18795,59 +19307,77 @@ (define_insn "addv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "addv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "addv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ssaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ssaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "usaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "usaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "subv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -18855,7 +19385,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "subv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18863,7 +19394,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "subv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -18871,7 +19403,19 @@ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "sssubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -18879,7 +19423,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "sssubv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18887,7 +19432,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ussubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -18895,7 +19441,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ussubv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18903,7 +19450,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "mulv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18911,7 +19459,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "smulv4hi3_highpart" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18924,7 +19473,8 @@ (const_int 16))))] "TARGET_MMX" "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "umulv4hi3_highpart" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18937,7 +19487,8 @@ (const_int 16))))] "TARGET_SSE || TARGET_3DNOW_A" "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "mmx_pmaddwd" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -18958,7 +19509,8 @@ (const_int 3)]))))))] "TARGET_MMX" "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) ;; MMX logical operations @@ -18968,49 +19520,58 @@ (define_insn "mmx_iordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + [(ior:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "por\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "mmx_xordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + [(xor:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") (set_attr "memory" "none")]) ;; Same as pxor, but don't show input operands so that we don't think ;; they are live. (define_insn "mmx_clrdi" [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(const_int 0)] 45))] + (unspec:DI [(const_int 0)] UNSPEC_NOP))] "TARGET_MMX" "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "mmx") + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") (set_attr "memory" "none")]) (define_insn "mmx_anddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + [(and:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "mmx_nanddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) ;; MMX unsigned averages/sum of absolute differences @@ -19032,7 +19593,8 @@ (const_int 1)))] "TARGET_SSE || TARGET_3DNOW_A" "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_uavgv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19047,15 +19609,18 @@ (const_int 1)))] "TARGET_SSE || TARGET_3DNOW_A" "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_psadbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))] + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSADBW))] "TARGET_SSE || TARGET_3DNOW_A" "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; MMX insert/extract/shuffle @@ -19068,7 +19633,8 @@ (match_operand:SI 3 "immediate_operand" "i")))] "TARGET_SSE || TARGET_3DNOW_A" "pinsrw\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_pextrw" [(set (match_operand:SI 0 "register_operand" "=r") @@ -19077,15 +19643,18 @@ [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SSE || TARGET_3DNOW_A" "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_pshufw" [(set (match_operand:V4HI 0 "register_operand" "=y") (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") - (match_operand:SI 2 "immediate_operand" "i")] 41))] + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] "TARGET_SSE || TARGET_3DNOW_A" "pshufw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) ;; MMX mask-generating comparisons @@ -19096,7 +19665,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "eqv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19104,7 +19674,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "eqv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19112,7 +19683,8 @@ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "gtv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19120,7 +19692,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "gtv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19128,7 +19701,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "gtv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19136,7 +19710,8 @@ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) ;; MMX max/min insns @@ -19147,7 +19722,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "smaxv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19155,7 +19731,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "uminv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19163,7 +19740,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "sminv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19171,7 +19749,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) ;; MMX shifts @@ -19182,7 +19761,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "ashrv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19190,7 +19770,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "lshrv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19198,7 +19779,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "lshrv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19206,17 +19788,20 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; See logical MMX insns. (define_insn "mmx_lshrdi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] "TARGET_MMX" "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "ashlv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19224,7 +19809,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "ashlv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19232,17 +19818,20 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; See logical MMX insns. (define_insn "mmx_ashldi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] "TARGET_MMX" "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; MMX pack/unpack insns. @@ -19254,7 +19843,8 @@ (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] "TARGET_MMX" "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_packssdw" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19263,7 +19853,8 @@ (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] "TARGET_MMX" "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_packuswb" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19272,7 +19863,8 @@ (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] "TARGET_MMX" "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19298,7 +19890,8 @@ (const_int 85)))] "TARGET_MMX" "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckhwd" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19316,21 +19909,21 @@ (const_int 5)))] "TARGET_MMX" "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckhdq" [(set (match_operand:V2SI 0 "register_operand" "=y") (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 1)])) + (match_operand:V2SI 1 "register_operand" "0") (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1) (const_int 0)])) (const_int 1)))] "TARGET_MMX" "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpcklbw" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19356,7 +19949,8 @@ (const_int 85)))] "TARGET_MMX" "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpcklwd" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19374,7 +19968,8 @@ (const_int 5)))] "TARGET_MMX" "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckldq" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19382,19 +19977,18 @@ (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") (parallel [(const_int 1) (const_int 0)])) - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 1)])) + (match_operand:V2SI 2 "register_operand" "y") (const_int 1)))] "TARGET_MMX" "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) ;; Miscellaneous stuff (define_insn "emms" - [(unspec_volatile [(const_int 0)] 31) + [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) (clobber (reg:XF 8)) (clobber (reg:XF 9)) (clobber (reg:XF 10)) @@ -19417,23 +20011,24 @@ (set_attr "memory" "unknown")]) (define_insn "ldmxcsr" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)] - "TARGET_MMX" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] + UNSPECV_LDMXCSR)] + "TARGET_SSE" "ldmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "load")]) (define_insn "stmxcsr" [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec_volatile:SI [(const_int 0)] 40))] - "TARGET_MMX" + (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] + "TARGET_SSE" "stmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "store")]) (define_expand "sfence" [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] 44))] + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] "TARGET_SSE || TARGET_3DNOW_A" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); @@ -19442,7 +20037,7 @@ (define_insn "*sfence_insn" [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] 44))] + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] "TARGET_SSE || TARGET_3DNOW_A" "sfence" [(set_attr "type" "sse") @@ -19457,7 +20052,7 @@ (reg:DI 25) (reg:DI 26) (reg:DI 27) - (reg:DI 28)] 13)) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "")) (use (match_operand:DI 2 "immediate_operand" "")) (use (label_ref:DI (match_operand 3 "" "")))])] @@ -19474,7 +20069,7 @@ (reg:DI 25) (reg:DI 26) (reg:DI 27) - (reg:DI 28)] 13)) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "r")) (use (match_operand:DI 2 "const_int_operand" "i")) (use (label_ref:DI (match_operand 3 "" "X")))] @@ -19517,7 +20112,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfadd\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "subv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19525,7 +20121,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfsub\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "subrv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19533,7 +20130,8 @@ (match_operand:V2SF 1 "register_operand" "0")))] "TARGET_3DNOW" "pfsubr\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "gtv2sf3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19541,7 +20139,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfcmpgt\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) (define_insn "gev2sf3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19549,7 +20148,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfcmpge\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) (define_insn "eqv2sf3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19557,7 +20157,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfcmpeq\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) (define_insn "pfmaxv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19565,7 +20166,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfmax\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pfminv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19573,7 +20175,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfmin\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "mulv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19581,10 +20184,11 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfmul\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "V2SF")]) (define_insn "femms" - [(unspec_volatile [(const_int 0)] 46) + [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) (clobber (reg:XF 8)) (clobber (reg:XF 9)) (clobber (reg:XF 10)) @@ -19603,14 +20207,16 @@ (clobber (reg:DI 36))] "TARGET_3DNOW" "femms" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) (define_insn "pf2id" [(set (match_operand:V2SI 0 "register_operand" "=y") (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pf2id\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) (define_insn "pf2iw" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19619,7 +20225,8 @@ (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] "TARGET_3DNOW_A" "pf2iw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) (define_insn "pfacc" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19636,7 +20243,8 @@ (parallel [(const_int 1)])))))] "TARGET_3DNOW" "pfacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pfnacc" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19653,7 +20261,8 @@ (parallel [(const_int 1)])))))] "TARGET_3DNOW_A" "pfnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pfpnacc" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19670,7 +20279,8 @@ (parallel [(const_int 1)])))))] "TARGET_3DNOW_A" "pfpnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pi2fw" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19686,14 +20296,16 @@ (parallel [(const_int 1)])))))))] "TARGET_3DNOW_A" "pi2fw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) (define_insn "floatv2si2" [(set (match_operand:V2SF 0 "register_operand" "=y") (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pi2fd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) ;; This insn is identical to pavgb in operation, but the opcode is ;; different. To avoid accidentally matching pavgb, use an unspec. @@ -19702,50 +20314,62 @@ [(set (match_operand:V8QI 0 "register_operand" "=y") (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))] + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PAVGUSB))] "TARGET_3DNOW" "pavgusb\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "TI")]) ;; 3DNow reciprical and sqrt (define_insn "pfrcpv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))] + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRCP))] "TARGET_3DNOW" "pfrcp\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrcpit1v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))] + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT1))] "TARGET_3DNOW" "pfrcpit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrcpit2v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))] + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT2))] "TARGET_3DNOW" "pfrcpit2\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrsqrtv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))] + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQRT))] "TARGET_3DNOW" - "pfrsqrt\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrsqit1v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))] + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQIT1))] "TARGET_3DNOW" "pfrsqit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pmulhrwv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19764,7 +20388,8 @@ (const_int 16))))] "TARGET_3DNOW" "pmulhrw\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "TI")]) (define_insn "pswapdv2si2" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19772,7 +20397,8 @@ (parallel [(const_int 1) (const_int 0)])))] "TARGET_3DNOW_A" "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "TI")]) (define_insn "pswapdv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19780,7 +20406,8 @@ (parallel [(const_int 1) (const_int 0)])))] "TARGET_3DNOW_A" "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "TI")]) (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") @@ -19843,7 +20470,8 @@ return patterns[locality]; } - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "memory" "none")]) (define_insn "*prefetch_3dnow" [(prefetch (match_operand:SI 0 "address_operand" "p") @@ -19870,4 +20498,1577 @@ else return "prefetchw\t%a0"; } - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +;; SSE2 support + +(define_insn "addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "addpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmaddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "addsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "subpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "subsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "mulpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "V2DF")]) + +(define_insn "vmmulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "mulsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "DF")]) + +(define_insn "divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "divpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "V2DF")]) + +(define_insn "vmdivv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "divsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "DF")]) + +;; SSE min/max + +(define_insn "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsmaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) +;; SSE2 square root. There doesn't appear to be an extension for the +;; reciprocal/rsqrt instructions if the Intel manual is to be believed. + +(define_insn "sqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))] + "TARGET_SSE2" + "sqrtpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) + (match_operand:V2DF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +;; SSE mask-generating compares + +(define_insn "maskcmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")]))] + "TARGET_SSE2" + "cmp%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) + +(define_insn "maskncmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (not:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")])))] + "TARGET_SSE2" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordps\t{%2, %0|%0, %2}"; + else + return "cmpn%D3pd\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) + +(define_insn "vmmaskcmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")]) + (subreg:V2DI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "vmmaskncmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (not:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")])) + (subreg:V2DI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE2" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordsd\t{%2, %0|%0, %2}"; + else + return "cmpn%D3sd\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "sse2_comi" + [(set (reg:CCFP 17) + (compare:CCFP (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "comisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "sse2_ucomi" + [(set (reg:CCFPU 17) + (compare:CCFPU (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "ucomisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +;; SSE Strange Moves. + +(define_insn "sse2_movmskpd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "movmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_maskmovdqu" + [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_maskmovdqu_rex64" + [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movntv2df" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movntv2di" + [(set (match_operand:V2DI 0 "memory_operand" "=m") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movntsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;; SSE <-> integer/MMX conversions + +;; Conversions between SI and SF + +(define_insn "cvtdq2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "cvtps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvttps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX))] + "TARGET_SSE2" + "cvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; Conversions between SI and DF + +(define_insn "cvtdq2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel + [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE2" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_SSE2" + "cvtpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_SSE2" + "cvttpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvtpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvttpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX))] + "TARGET_SSE2" + "cvttpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvtpi2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_SSE2" + "cvtpi2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; Conversions between SI and DF + +(define_insn "cvtsd2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "cvtsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + +(define_insn "cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + +(define_insn "cvttsd2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + +(define_insn "cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector,vector")]) + +(define_insn "cvtsi2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + (vec_duplicate:V2DF + (float:DF + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 2)))] + "TARGET_SSE2" + "cvtsi2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") + (vec_duplicate:V2DF + (float:DF + (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 2)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "vector,direct")]) + +;; Conversions between SF and DF + +(define_insn "cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "xm"))) + (const_int 14)))] + "TARGET_SSE2" + "cvtsd2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 2 "register_operand" "xm") + (parallel [(const_int 0) + (const_int 1)]))) + (const_int 2)))] + "TARGET_SSE2" + "cvtss2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (subreg:V4SF + (vec_concat:V4SI + (subreg:V2SI (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0) + (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))] + "TARGET_SSE2" + "cvtpd2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "cvtps2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float_extend:V2DF + (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE2" + "cvtps2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;; SSE2 variants of MMX insns + +;; MMX arithmetic + +(define_insn "addv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "addv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "addv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "addv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssaddv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddsb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssaddv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "usaddv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddusb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "usaddv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddusw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (minus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (minus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sssubv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubsb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sssubv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ussubv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubusb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ussubv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubusw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (mult:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "smulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) + (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "umulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) + (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (mult:DI (zero_extend:DI (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)]))) + (zero_extend:DI (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulv2siv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (mult:V2DI (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pmaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSE2" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "sse2_clrti" + [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] + "TARGET_SSE2" + "pxor\t{%0, %0|%0, %0}" + [(set_attr "type" "sseiadd") + (set_attr "memory" "none") + (set_attr "mode" "TI")]) + +;; MMX unsigned averages/sum of absolute differences + +(define_insn "sse2_uavgv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ashiftrt:V16QI + (plus:V16QI (plus:V16QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (const_vector:V16QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1)))] + "TARGET_SSE2" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_uavgv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI + (plus:V8HI (plus:V8HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1)))] + "TARGET_SSE2" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;; @@@ this isn't the right representation. +(define_insn "sse2_psadbw" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSADBW))] + "TARGET_SSE2" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + + +;; MMX insert/extract/shuffle + +(define_insn "sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0") + (vec_duplicate:V8HI + (truncate:HI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SSE2" + "pinsrw\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI (match_operand:V8HI 1 "register_operand" "x") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SSE2" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pshufd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] + "TARGET_SSE2" + "pshufd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pshuflw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_PSHUFLW))] + "TARGET_SSE2" + "pshuflw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pshufhw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_PSHUFHW))] + "TARGET_SSE2" + "pshufhw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; MMX mask-generating comparisons + +(define_insn "eqv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (eq:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "eqv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (eq:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "eqv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (eq:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "gtv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (gt:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "gtv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (gt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "gtv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (gt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + + +;; MMX max/min insns + +(define_insn "umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umax:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smax:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umin:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smin:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + + +;; MMX shifts + +(define_insn "ashrv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashrv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashrv8hi3_ti" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashrv4si3_ti" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv8hi3_ti" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv4si3_ti" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv2di3_ti" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv8hi3_ti" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv4si3_ti" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv2di3_ti" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +;; See logical MMX insns for the reason for the unspec. Strictly speaking +;; we wouldn't need here it since we never generate TImode arithmetic. + +;; There has to be some kind of prize for the weirdest new instruction... +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI + [(ashift:TI (match_operand:TI 1 "register_operand" "0") + (mult:SI (match_operand:SI 2 "immediate_operand" "i") + (const_int 8)))] UNSPEC_NOP))] + "TARGET_SSE2" + "pslldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI + [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (mult:SI (match_operand:SI 2 "immediate_operand" "i") + (const_int 8)))] UNSPEC_NOP))] + "TARGET_SSE2" + "psrldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +;; SSE unpack + +(define_insn "sse2_unpckhpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 1)])) + (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "unpckhpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_unpcklpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 1)]))))] + "TARGET_SSE2" + "unpcklpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; MMX pack/unpack insns. + +(define_insn "sse2_packsswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) + (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packssdw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0")) + (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packuswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) + (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") + (parallel [(const_int 8) (const_int 0) + (const_int 9) (const_int 1) + (const_int 10) (const_int 2) + (const_int 11) (const_int 3) + (const_int 12) (const_int 4) + (const_int 13) (const_int 5) + (const_int 14) (const_int 6) + (const_int 15) (const_int 7)])) + (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])) + (const_int 21845)))] + "TARGET_SSE2" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 4) (const_int 0) + (const_int 5) (const_int 1) + (const_int 6) (const_int 2) + (const_int 7) (const_int 3)])) + (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])) + (const_int 85)))] + "TARGET_SSE2" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhdq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 2) (const_int 0) + (const_int 3) (const_int 1)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])) + (const_int 5)))] + "TARGET_SSE2" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])) + (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") + (parallel [(const_int 8) (const_int 0) + (const_int 9) (const_int 1) + (const_int 10) (const_int 2) + (const_int 11) (const_int 3) + (const_int 12) (const_int 4) + (const_int 13) (const_int 5) + (const_int 14) (const_int 6) + (const_int 15) (const_int 7)])) + (const_int 21845)))] + "TARGET_SSE2" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])) + (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") + (parallel [(const_int 4) (const_int 0) + (const_int 5) (const_int 1) + (const_int 6) (const_int 2) + (const_int 7) (const_int 3)])) + (const_int 85)))] + "TARGET_SSE2" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckldq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") + (parallel [(const_int 2) (const_int 0) + (const_int 3) (const_int 1)])) + (const_int 5)))] + "TARGET_SSE2" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") + (parallel [(const_int 1) + (const_int 0)])) + (match_operand:V2DI 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "punpcklqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (match_operand:V2DI 1 "register_operand" "0") + (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_SSE2" + "punpckhqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; SSE2 moves + +(define_insn "sse2_movapd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVA))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movapd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movupd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movupd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movdqa" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVA))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqa\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movdqu" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movdq2q" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && !TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movdq2q_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movq2dq" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") + (const_int 0)))] + "TARGET_SSE2 && !TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movq2dq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") + (const_int 0)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov,ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_concat:V2DI (vec_select:DI + (match_operand:V2DI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (const_int 0)))] + "TARGET_SSE2" + "movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_loadd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) + (const_vector:V4SI [(const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "TARGET_SSE2" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_stored" + [(set (match_operand:SI 0 "nonimmediate_operand" "=mr") + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "m,x") + (const_int 2)))] + "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movhpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movlpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "m,x") + (const_int 1)))] + "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movlpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") + (const_int 1)))] + "TARGET_SSE2" + "movsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_movsd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "register_operand" "x") + (const_int 1)))] + "TARGET_SSE2" + "movsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_storesd" + [(set (match_operand:DF 0 "memory_operand" "=m") + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "movsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_shufpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] + "TARGET_SSE2" + ;; @@@ check operand order for intel/nonintel syntax + "shufpd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_clflush" + [(unspec_volatile [(match_operand 0 "address_operand" "p")] + UNSPECV_CLFLUSH)] + "TARGET_SSE2" + "clflush %0" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_mfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*mfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" + "mfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_lfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*lfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" + "lfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) diff --git a/contrib/gcc/config/i386/i386elf.h b/contrib/gcc/config/i386/i386elf.h index ddf19b6..cd01db2 100644 --- a/contrib/gcc/config/i386/i386elf.h +++ b/contrib/gcc/config/i386/i386elf.h @@ -1,5 +1,6 @@ /* Target definitions for GNU compiler for Intel 80386 using ELF - Copyright (C) 1988, 1991, 1995, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1988, 1991, 1995, 2000, 2001, 2002 + Free Software Foundation, Inc. Derived from sysv4.h written by Ron Guilmette (rfg@netcom.com). @@ -24,7 +25,6 @@ Boston, MA 02111-1307, USA. */ #undef PREFERRED_DEBUGGING_TYPE #define PREFERRED_DEBUGGING_TYPE DBX_DEBUG -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 bare ELF target)"); /* By default, target has a 80387, uses IEEE compatible arithmetic, @@ -40,12 +40,8 @@ Boston, MA 02111-1307, USA. */ (TYPE_MODE (TYPE) == BLKmode \ || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8)) -/* This used to define X86, but james@bigtex.cactus.org says that - is supposed to be defined optionally by user programs--not by default. */ -#define CPP_PREDEFINES "" - #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu)" +#define CPP_SPEC "" #define ENDFILE_SPEC "crtend.o%s" diff --git a/contrib/gcc/config/i386/k6.md b/contrib/gcc/config/i386/k6.md new file mode 100644 index 0000000..af128bf --- /dev/null +++ b/contrib/gcc/config/i386/k6.md @@ -0,0 +1,136 @@ +;; AMD K6/K6-2 Scheduling +;; Copyright (C) 2002 ;; Free Software Foundation, Inc. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +;; +;; The K6 has similar architecture to PPro. Important difference is, that +;; there are only two decoders and they seems to be much slower than execution +;; units. So we have to pay much more attention to proper decoding for +;; schedulers. We share most of scheduler code for PPro in i386.c +;; +;; The fp unit is not pipelined and do one operation per two cycles including +;; the FXCH. +;; +;; alu describes both ALU units (ALU-X and ALU-Y). +;; alux describes X alu unit +;; fpu describes FPU unit +;; load describes load unit. +;; branch describes branch unit. +;; store decsribes store unit. This unit is not modelled completely and only +;; used to model lea operation. Otherwise it lie outside of the critical +;; path. +;; +;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. + +;; The decoder specification is in the PPro section above! + +;; Shift instructions and certain arithmetic are issued only to X pipe. +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")) + 1 1) + +;; The QI mode arithmetic is issued to X pipe only. +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec") + (match_operand:QI 0 "general_operand" ""))) + 1 1) + +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea")) + 1 1) + +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (eq_attr "memory" "none"))) + 1 1) + +(define_function_unit "k6_branch" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "call,callv,ibr")) + 1 1) + +;; Load unit have two cycle latency, but we take care for it in adjust_cost +(define_function_unit "k6_load" 1 0 + (and (eq_attr "cpu" "k6") + (ior (eq_attr "type" "pop") + (eq_attr "memory" "load,both"))) + 1 1) + +(define_function_unit "k6_load" 1 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both"))) + 10 10) + +;; Lea have two instructions, so latency is probably 2 +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "lea")) + 2 1) + +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "str")) + 10 10) + +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") + (ior (eq_attr "type" "push") + (eq_attr "memory" "store,both"))) + 1 1) + +(define_function_unit "k6_fpu" 1 1 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "fop,fmov,fcmp,fistp")) + 2 2) + +(define_function_unit "k6_fpu" 1 1 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "fmul")) + 2 2) + +;; ??? Guess +(define_function_unit "k6_fpu" 1 1 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "fdiv,fpspc")) + 56 56) + +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "imul")) + 2 2) + +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "imul")) + 2 2) + +;; ??? Guess +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "idiv")) + 17 17) + +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "idiv")) + 17 17) diff --git a/contrib/gcc/config/i386/linux-aout.h b/contrib/gcc/config/i386/linux-aout.h index 4b9cd5c..783b8a9 100644 --- a/contrib/gcc/config/i386/linux-aout.h +++ b/contrib/gcc/config/i386/linux-aout.h @@ -1,5 +1,6 @@ /* Definitions for Intel 386 running Linux-based GNU systems using a.out. - Copyright (C) 1992, 1994, 1995, 1997, 1998, 2002 Free Software Foundation, Inc. + Copyright (C) 1992, 1994, 1995, 1997, 1998, 2002 + Free Software Foundation, Inc. Contributed by H.J. Lu (hjl@nynexst.com) This file is part of GNU CC. @@ -19,22 +20,26 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* This is tested by i386/gas.h. */ -#define YES_UNDERSCORES - -#include <i386/gstabs.h> -#include <linux-aout.h> /* some common stuff */ - #undef ASM_COMMENT_START #define ASM_COMMENT_START "#" -/* Specify predefined symbols in preprocessor. */ - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -D__gnu_linux__ -Dlinux -Asystem=posix" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_define ("__gnu_linux__"); \ + builtin_assert ("system=posix"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ + while (0) #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{posix:-D_POSIX_SOURCE}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" #undef SIZE_TYPE #define SIZE_TYPE "unsigned int" diff --git a/contrib/gcc/config/i386/linux.h b/contrib/gcc/config/i386/linux.h index b07bd03..6b553cd 100644 --- a/contrib/gcc/config/i386/linux.h +++ b/contrib/gcc/config/i386/linux.h @@ -33,7 +33,6 @@ Boston, MA 02111-1307, USA. */ fputs ("\t.intel_syntax\n", FILE); \ } while (0) -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 Linux/ELF)"); /* The svr4 ABI for the i386 says that records and unions are returned @@ -54,23 +53,14 @@ Boston, MA 02111-1307, USA. */ #define NO_PROFILE_COUNTERS -#undef FUNCTION_PROFILER -#define FUNCTION_PROFILER(FILE, LABELNO) \ -{ \ - if (flag_pic) \ - fprintf (FILE, "\tcall\t*mcount@GOT(%%ebx)\n"); \ - else \ - fprintf (FILE, "\tcall\tmcount\n"); \ -} +#undef MCOUNT_NAME +#define MCOUNT_NAME "mcount" -/* True if it is possible to profile code that does not have a frame - pointer. - - The GLIBC version of mcount for the x86 assumes that there is a +/* The GLIBC version of mcount for the x86 assumes that there is a frame, so we cannot allow profiling without a frame pointer. */ -#undef TARGET_ALLOWS_PROFILING_WITHOUT_FRAME_POINTER -#define TARGET_ALLOWS_PROFILING_WITHOUT_FRAME_POINTER false +#undef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED current_function_profile #undef SIZE_TYPE #define SIZE_TYPE "unsigned int" @@ -84,14 +74,27 @@ Boston, MA 02111-1307, USA. */ #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE BITS_PER_WORD -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__ELF__ -Dunix -D__gnu_linux__ -Dlinux -Asystem=posix" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_define ("__ELF__"); \ + builtin_define ("__gnu_linux__"); \ + builtin_assert ("system=posix"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ + while (0) #undef CPP_SPEC #ifdef USE_GNULIBC_1 -#define CPP_SPEC "%(cpp_cpu) %{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{posix:-D_POSIX_SOURCE}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" #else -#define CPP_SPEC "%(cpp_cpu) %{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" #endif #undef CC1_SPEC @@ -221,9 +224,13 @@ Boston, MA 02111-1307, USA. */ state data appropriately. See unwind-dw2.c for the structs. */ #ifdef IN_LIBGCC2 +/* There's no sys/ucontext.h for some (all?) libc1, so no + signal-turned-exceptions for them. There's also no configure-run for + the target, so we can't check on (e.g.) HAVE_SYS_UCONTEXT_H. Using the + target libc1 macro should be enough. */ +#ifndef USE_GNULIBC_1 #include <signal.h> #include <sys/ucontext.h> -#endif #define MD_FALLBACK_FRAME_STATE_FOR(CONTEXT, FS, SUCCESS) \ do { \ @@ -278,3 +285,5 @@ Boston, MA 02111-1307, USA. */ (FS)->retaddr_column = 8; \ goto SUCCESS; \ } while (0) +#endif /* not USE_GNULIBC_1 */ +#endif /* IN_LIBGCC2 */ diff --git a/contrib/gcc/config/i386/linux64.h b/contrib/gcc/config/i386/linux64.h index 34c6d3c..7a9e0ba 100644 --- a/contrib/gcc/config/i386/linux64.h +++ b/contrib/gcc/config/i386/linux64.h @@ -1,5 +1,5 @@ /* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format. - Copyright (C) 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h. This file is part of GNU CC. @@ -21,14 +21,31 @@ Boston, MA 02111-1307, USA. */ #define LINUX_DEFAULT_ELF -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (x86-64 Linux/ELF)"); -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__ELF__ -Dunix -D__gnu_linux__ -Dlinux -Asystem(posix)" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_define ("__gnu_linux__"); \ + builtin_define ("__ELF__"); \ + builtin_assert ("system=posix"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + if (TARGET_64BIT) \ + { \ + builtin_define ("__LP64__"); \ + builtin_define ("_LP64"); \ + } \ + } \ + while (0) #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT} %{!m32:-D__LONG_MAX__=9223372036854775807L}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" /* The svr4 ABI for the i386 says that records and unions are returned in memory. In the 64bit compilation we will turn this flag off in @@ -45,7 +62,7 @@ Boston, MA 02111-1307, USA. */ done. */ #undef LINK_SPEC -#define LINK_SPEC "%{!m32:-m elf_x86_64 -Y P,/usr/lib64} %{m32:-m elf_i386} \ +#define LINK_SPEC "%{!m32:-m elf_x86_64} %{m32:-m elf_i386} \ %{shared:-shared} \ %{!shared: \ %{!static: \ @@ -104,17 +121,17 @@ Boston, MA 02111-1307, USA. */ (FS)->regs.reg[0].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[0].loc.offset = (long)&sc_->rax - new_cfa_; \ (FS)->regs.reg[1].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[1].loc.offset = (long)&sc_->rbx - new_cfa_; \ + (FS)->regs.reg[1].loc.offset = (long)&sc_->rdx - new_cfa_; \ (FS)->regs.reg[2].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[2].loc.offset = (long)&sc_->rcx - new_cfa_; \ (FS)->regs.reg[3].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[3].loc.offset = (long)&sc_->rdx - new_cfa_; \ + (FS)->regs.reg[3].loc.offset = (long)&sc_->rbx - new_cfa_; \ (FS)->regs.reg[4].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[4].loc.offset = (long)&sc_->rbp - new_cfa_; \ + (FS)->regs.reg[4].loc.offset = (long)&sc_->rsi - new_cfa_; \ (FS)->regs.reg[5].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[5].loc.offset = (long)&sc_->rsi - new_cfa_; \ + (FS)->regs.reg[5].loc.offset = (long)&sc_->rdi - new_cfa_; \ (FS)->regs.reg[6].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[6].loc.offset = (long)&sc_->rdi - new_cfa_; \ + (FS)->regs.reg[6].loc.offset = (long)&sc_->rbp - new_cfa_; \ (FS)->regs.reg[8].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[8].loc.offset = (long)&sc_->r8 - new_cfa_; \ (FS)->regs.reg[9].how = REG_SAVED_OFFSET; \ @@ -131,6 +148,8 @@ Boston, MA 02111-1307, USA. */ (FS)->regs.reg[14].loc.offset = (long)&sc_->r14 - new_cfa_; \ (FS)->regs.reg[15].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[15].loc.offset = (long)&sc_->r15 - new_cfa_; \ + (FS)->regs.reg[16].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[16].loc.offset = (long)&sc_->rip - new_cfa_; \ (FS)->retaddr_column = 16; \ goto SUCCESS; \ } while (0) diff --git a/contrib/gcc/config/i386/lynx-ng.h b/contrib/gcc/config/i386/lynx-ng.h index 49150a3..08fa60f 100644 --- a/contrib/gcc/config/i386/lynx-ng.h +++ b/contrib/gcc/config/i386/lynx-ng.h @@ -1,5 +1,5 @@ /* Definitions for Intel 386 running LynxOS, using Lynx's old as and ld. - Copyright (C) 1993, 1995 Free Software Foundation, Inc. + Copyright (C) 1993, 1995, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,12 +18,19 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <i386/gstabs.h> -#include <lynx-ng.h> +#define TARGET_VERSION fprintf (stderr, " (80386, LYNX BSD syntax)"); -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -DI386 -DLynx -DIBITS32 \ - -Asystem=unix -Asystem=lynx" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_define_std ("I386"); \ + builtin_define_std ("Lynx"); \ + builtin_define_std ("IBITS32"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=lynx"); \ + } \ + while (0) /* Provide required defaults for linker switches. */ @@ -36,3 +43,35 @@ Boston, MA 02111-1307, USA. */ #define CALL_USED_REGISTERS \ /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/ \ { 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +/* Prefix for internally generated assembler labels. If we aren't using + underscores, we are using prefix `.'s to identify labels that should + be ignored, as in `i386/gas.h' --karl@cs.umb.edu */ + +#undef LPREFIX +#define LPREFIX ".L" + +/* The prefix to add to user-visible assembler symbols. */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* If user-symbols don't have underscores, + then it must take more than `L' to identify + a label that should be ignored. */ + +/* This is how to store into the string BUF + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \ + sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER)) + +/* This is how to output an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. */ + +#undef ASM_OUTPUT_INTERNAL_LABEL +#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ + fprintf (FILE, ".%s%d:\n", PREFIX, NUM) diff --git a/contrib/gcc/config/i386/lynx.h b/contrib/gcc/config/i386/lynx.h index 91ed31e..7835f27 100644 --- a/contrib/gcc/config/i386/lynx.h +++ b/contrib/gcc/config/i386/lynx.h @@ -1,5 +1,5 @@ /* Definitions for Intel 386 running LynxOS. - Copyright (C) 1993, 1995, 1996 Free Software Foundation, Inc. + Copyright (C) 1993, 1995, 1996, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,11 +18,18 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <i386/gstabs.h> -#include <lynx.h> +#define TARGET_VERSION fprintf (stderr, " (80386, LYNX BSD syntax)"); -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-DI386 -DLynx -DIBITS32 -Asystem=unix -Asystem=lynx" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("I386"); \ + builtin_define_std ("Lynx"); \ + builtin_define_std ("IBITS32"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=lynx"); \ + } \ + while (0) /* The prefix to add to user-visible assembler symbols. */ @@ -37,3 +44,35 @@ Boston, MA 02111-1307, USA. */ #define CALL_USED_REGISTERS \ /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/ \ { 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +/* Prefix for internally generated assembler labels. If we aren't using + underscores, we are using prefix `.'s to identify labels that should + be ignored, as in `i386/gas.h' --karl@cs.umb.edu */ + +#undef LPREFIX +#define LPREFIX ".L" + +/* The prefix to add to user-visible assembler symbols. */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* If user-symbols don't have underscores, + then it must take more than `L' to identify + a label that should be ignored. */ + +/* This is how to store into the string BUF + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \ + sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER)) + +/* This is how to output an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. */ + +#undef ASM_OUTPUT_INTERNAL_LABEL +#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ + fprintf (FILE, ".%s%d:\n", PREFIX, NUM) diff --git a/contrib/gcc/config/i386/mach.h b/contrib/gcc/config/i386/mach.h index 7e2b1cc..0aa3846 100644 --- a/contrib/gcc/config/i386/mach.h +++ b/contrib/gcc/config/i386/mach.h @@ -1,13 +1,16 @@ /* Configuration for an i386 running Mach as the target machine. */ -/* We do want to add an underscore to the front of each user symbol. - i386/gas.h checks this. */ -#define YES_UNDERSCORES +#define TARGET_VERSION fprintf (stderr, " (80386, Mach)"); -#include "i386/gstabs.h" - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -DMACH -Asystem=unix -Asystem=mach" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_define_std ("MACH"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=mach"); \ + } \ + while (0) /* Specify extra dir to search for include files. */ #define SYSTEM_INCLUDE_DIR "/usr/mach/include" diff --git a/contrib/gcc/config/i386/mingw32.h b/contrib/gcc/config/i386/mingw32.h index 629328e..7f62fbd 100644 --- a/contrib/gcc/config/i386/mingw32.h +++ b/contrib/gcc/config/i386/mingw32.h @@ -1,6 +1,7 @@ /* Operating system specific defines to be used when targeting GCC for hosting on Windows32, using GNU tools and the Windows32 API Library. - Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. This file is part of GNU CC. @@ -32,13 +33,35 @@ Boston, MA 02111-1307, USA. */ #define TARGET_EXECUTABLE_SUFFIX ".exe" -/* Please keep changes to CPP_PREDEFINES in sync with i386/crtdll. The - only difference between the two should be __MSVCRT__ needed to - distinguish MSVC from CRTDLL runtime in mingw headers. */ -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D_WIN32 -D__WIN32 -D__WIN32__ -DWIN32 \ - -D__MINGW32__ -D__MSVCRT__ -DWINNT -D_X86_=1 \ - -Asystem=winnt" +/* See i386/crtdll.h for an altervative definition. */ +#define EXTRA_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__MSVCRT__"); \ + builtin_define ("__MINGW32__"); \ + } \ + while (0) + +#undef TARGET_OS_CPP_BUILTINS /* From cygwin.h. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_WIN32"); \ + builtin_define_std ("WIN32"); \ + builtin_define_std ("WINNT"); \ + builtin_define ("_X86_=1"); \ + builtin_define ("__stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("__cdecl=__attribute__((__cdecl__))"); \ + builtin_define ("__declspec(x)=__attribute__((x))"); \ + if (!flag_iso) \ + { \ + builtin_define ("_stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("_cdecl=__attribute__((__cdecl__))"); \ + } \ + EXTRA_OS_CPP_BUILTINS (); \ + builtin_assert ("system=winnt"); \ + } \ + while (0) /* Specific a different directory for the standard include files. */ #undef STANDARD_INCLUDE_DIR @@ -47,14 +70,7 @@ Boston, MA 02111-1307, USA. */ #define STANDARD_INCLUDE_COMPONENT "MINGW" #undef CPP_SPEC -#define CPP_SPEC \ - "-remap %(cpp_cpu) %{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT} \ - -D__stdcall=__attribute__((__stdcall__)) \ - -D__cdecl=__attribute__((__cdecl__)) \ - %{!ansi:-D_stdcall=__attribute__((__stdcall__)) \ - -D_cdecl=__attribute__((__cdecl__))} \ - -D__declspec(x)=__attribute__((x))" - +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT}" /* For Windows applications, include more libraries, but always include kernel32. */ @@ -74,7 +90,7 @@ Boston, MA 02111-1307, USA. */ /* Include in the mingw32 libraries with libgcc */ #undef LIBGCC_SPEC #define LIBGCC_SPEC \ - "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmsvcrt" + "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmingwex -lmsvcrt" #undef STARTFILE_SPEC #define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \ @@ -85,24 +101,33 @@ Boston, MA 02111-1307, USA. */ #define MATH_LIBRARY "" /* Output STRING, a string representing a filename, to FILE. - We canonicalize it to be in MS-DOS format. */ + We canonicalize it to be in Unix format (backslashe are replaced + forward slashes. */ #undef OUTPUT_QUOTED_STRING -#define OUTPUT_QUOTED_STRING(FILE, STRING) \ -do { \ - char c; \ - \ - putc ('\"', asm_file); \ - \ - while ((c = *string++) != 0) \ - { \ - if (c == '\\') \ - c = '/'; \ - \ - if (c == '\"') \ - putc ('\\', asm_file); \ - putc (c, asm_file); \ - } \ - \ - putc ('\"', asm_file); \ +#define OUTPUT_QUOTED_STRING(FILE, STRING) \ +do { \ + char c; \ + \ + putc ('\"', asm_file); \ + \ + while ((c = *string++) != 0) \ + { \ + if (c == '\\') \ + c = '/'; \ + \ + if (ISPRINT (c)) \ + { \ + if (c == '\"') \ + putc ('\\', asm_file); \ + putc (c, asm_file); \ + } \ + else \ + fprintf (asm_file, "\\%03o", (unsigned char) c); \ + } \ + \ + putc ('\"', asm_file); \ } while (0) +/* Define as short unsigned for compatability with MS runtime. */ +#undef WINT_TYPE +#define WINT_TYPE "short unsigned int" diff --git a/contrib/gcc/config/i386/mmintrin.h b/contrib/gcc/config/i386/mmintrin.h index bbfdd30..7b4aa01 100644 --- a/contrib/gcc/config/i386/mmintrin.h +++ b/contrib/gcc/config/i386/mmintrin.h @@ -30,6 +30,9 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED +#ifndef __MMX__ +# error "MMX instruction set not enabled" +#else /* The data type intended for user use. */ typedef int __m64 __attribute__ ((__mode__ (__V2SI__))); @@ -53,6 +56,22 @@ _mm_cvtsi32_si64 (int __i) return (__m64) __tmp; } +#ifdef __x86_64__ +/* Convert I to a __m64 object. */ +static __inline __m64 +_mm_cvtsi64x_si64 (long long __i) +{ + return (__m64) __i; +} + +/* Convert I to a __m64 object. */ +static __inline __m64 +_mm_set_pi64x (long long __i) +{ + return (__m64) __i; +} +#endif + /* Convert the lower 32 bits of the __m64 object into an integer. */ static __inline int _mm_cvtsi64_si32 (__m64 __i) @@ -61,6 +80,15 @@ _mm_cvtsi64_si32 (__m64 __i) return __tmp; } +#ifdef __x86_64__ +/* Convert the lower 32 bits of the __m64 object into an integer. */ +static __inline long long +_mm_cvtsi64_si64x (__m64 __i) +{ + return (long long)__i; +} +#endif + /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with signed saturation. */ @@ -157,6 +185,13 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); } +/* Add the 64-bit values in M1 to the 64-bit values in M2. */ +static __inline __m64 +_mm_add_si64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); +} + /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed saturated arithmetic. */ static __inline __m64 @@ -210,6 +245,13 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); } +/* Add the 64-bit values in M1 to the 64-bit values in M2. */ +static __inline __m64 +_mm_sub_si64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); +} + /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed saturating arithmetic. */ static __inline __m64 @@ -541,4 +583,5 @@ _mm_set1_pi8 (char __b) return _mm_set1_pi32 (__i); } +#endif /* __MMX__ */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/contrib/gcc/config/i386/moss.h b/contrib/gcc/config/i386/moss.h index 200cae0..642c625 100644 --- a/contrib/gcc/config/i386/moss.h +++ b/contrib/gcc/config/i386/moss.h @@ -19,8 +19,20 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__ELF__ -Dmoss -Asystem=posix" +#undef TARGET_OS_CPP_BUILTINS /* config.gcc includes i386/linux.h. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("moss"); \ + builtin_define ("__ELF__"); \ + builtin_assert ("system=posix"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ + while (0) #undef STARTFILE_SPEC #define STARTFILE_SPEC "crt0.o%s" diff --git a/contrib/gcc/config/i386/netbsd-elf.h b/contrib/gcc/config/i386/netbsd-elf.h index 30267df..4f49bd3 100644 --- a/contrib/gcc/config/i386/netbsd-elf.h +++ b/contrib/gcc/config/i386/netbsd-elf.h @@ -20,28 +20,35 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* Provide a LINK_SPEC appropriate for a NetBSD/i386 ELF target. - This is a copy of LINK_SPEC from <netbsd-elf.h> tweaked for - the i386 target. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + } \ + while (0) + + +/* Extra specs needed for NetBSD/i386 ELF. */ + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, + + +/* Provide a LINK_SPEC appropriate for a NetBSD/i386 ELF target. */ #undef LINK_SPEC -#define LINK_SPEC \ - "%{assert*} %{R*} \ - %{shared:-shared} \ - %{!shared: \ - -dc -dp \ - %{!nostdlib: \ - %{!r*: \ - %{!e*:-e __start}}} \ - %{!static: \ - %{rdynamic:-export-dynamic} \ - %{!dynamic-linker:-dynamic-linker /usr/libexec/ld.elf_so}} \ - %{static:-static}}" - -/* Names to predefine in the preprocessor for this target machine. */ - -#define CPP_PREDEFINES \ - "-D__NetBSD__ -D__ELF__ -Asystem=unix -Asystem=NetBSD" +#define LINK_SPEC NETBSD_LINK_SPEC_ELF + +#define NETBSD_ENTRY_POINT "__start" + + +/* Provide a CPP_SPEC appropriate for NetBSD. */ + +#undef CPP_SPEC +#define CPP_SPEC "%(netbsd_cpp_spec)" + /* Make gcc agree with <machine/ansi.h> */ @@ -114,5 +121,7 @@ Boston, MA 02111-1307, USA. */ we don't care about compatibility with older gcc versions. */ #define DEFAULT_PCC_STRUCT_RETURN 1 -#undef TARGET_VERSION +/* Attempt to enable execute permissions on the stack. */ +#define TRANSFER_FROM_TRAMPOLINE NETBSD_ENABLE_EXECUTE_STACK + #define TARGET_VERSION fprintf (stderr, " (NetBSD/i386 ELF)"); diff --git a/contrib/gcc/config/i386/netbsd.h b/contrib/gcc/config/i386/netbsd.h index 1d95de2..45ae893 100644 --- a/contrib/gcc/config/i386/netbsd.h +++ b/contrib/gcc/config/i386/netbsd.h @@ -1,20 +1,24 @@ -/* This is tested by i386gas.h. */ -#define YES_UNDERSCORES +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_AOUT(); \ + } \ + while (0) -#include <i386/gstabs.h> - -/* Get generic NetBSD definitions. */ -#include <netbsd.h> -#include <netbsd-aout.h> +#define TARGET_VERSION fprintf (stderr, " (NetBSD/i386 a.out)"); /* This goes away when the math-emulator is fixed */ #undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT \ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387) -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -D__NetBSD__ \ - -Asystem=unix -Asystem=bsd -Asystem=NetBSD" +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, + +#undef CPP_SPEC +#define CPP_SPEC "%(netbsd_cpp_spec)" + #undef SIZE_TYPE #define SIZE_TYPE "unsigned int" @@ -58,3 +62,11 @@ /* Until they use ELF or something that handles dwarf2 unwinds and initialization stuff better. */ #define DWARF2_UNWIND_INFO 0 + +/* Redefine this so that it becomes "_GLOBAL_OFFSET_TABLE_" when the label + prefix is added. */ +#undef GOT_SYMBOL_NAME +#define GOT_SYMBOL_NAME "GLOBAL_OFFSET_TABLE_" + +/* Attempt to enable execute permissions on the stack. */ +#define TRANSFER_FROM_TRAMPOLINE NETBSD_ENABLE_EXECUTE_STACK diff --git a/contrib/gcc/config/i386/netbsd64.h b/contrib/gcc/config/i386/netbsd64.h index 051f33b..341b6d1 100644 --- a/contrib/gcc/config/i386/netbsd64.h +++ b/contrib/gcc/config/i386/netbsd64.h @@ -20,50 +20,40 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + if (TARGET_64BIT) \ + NETBSD_OS_CPP_BUILTINS_LP64(); \ + } \ + while (0) -/* Provide a LINK_SPEC appropriate for a NetBSD/x86-64 ELF target. - This is a copy of LINK_SPEC from <netbsd-elf.h> tweaked for - the x86-64 target. */ -#undef LINK_SPEC -#define LINK_SPEC \ - "%{!m32:-m elf_x86_64} \ - %{m32:-m elf_i386} \ - %{assert*} %{R*} \ - %{shared:-shared} \ - %{!shared: \ - -dc -dp \ - %{!nostdlib: \ - %{!r*: \ - %{!e*:-e _start}}} \ - %{!static: \ - %{rdynamic:-export-dynamic} \ - %{!dynamic-linker:-dynamic-linker /usr/libexec/ld.elf_so}} \ - %{static:-static}}" - - -/* Names to predefine in the preprocessor for this target machine. */ +/* Extra specs needed for NetBSD/x86-64 ELF. */ -#define CPP_PREDEFINES \ - "-D__NetBSD__ -D__ELF__ -Asystem=unix -Asystem=NetBSD" +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, -/* Provide some extra CPP specs needed by NetBSD/x86_64. */ -#define CPP_LP64_SPEC "%{!m32:-D_LP64}" +/* Provide a LINK_SPEC appropriate for a NetBSD/x86-64 ELF target. */ -#define CPP_SUBTARGET_SPEC "%(cpp_lp64)" +#undef LINK_SPEC +#define LINK_SPEC \ + "%{m32:-m elf_i386} \ + %{m64:-m elf_x86_64} \ + %(netbsd_link_spec)" -#undef SUBTARGET_EXTRA_SPECS -#define SUBTARGET_EXTRA_SPECS \ - { "cpp_lp64", CPP_LP64_SPEC }, \ - { "cpp_subtarget", CPP_SUBTARGET_SPEC }, +#define NETBSD_ENTRY_POINT "_start" -/* Provide a CPP_SPEC appropriate for NetBSD. Currently we deal with - our subtarget specs and the GCC option `-posix'. */ +/* Provide a CPP_SPEC appropriate for NetBSD. */ #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %(cpp_subtarget) %{posix:-D_POSIX_SOURCE}" +#define CPP_SPEC "%(netbsd_cpp_spec)" /* Output assembler code to FILE to call the profiler. */ @@ -79,6 +69,7 @@ Boston, MA 02111-1307, USA. */ fprintf (FILE, "\tcall __mcount\n"); \ } +/* Attempt to enable execute permissions on the stack. */ +#define TRANSFER_FROM_TRAMPOLINE NETBSD_ENABLE_EXECUTE_STACK -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (NetBSD/x86_64 ELF)"); diff --git a/contrib/gcc/config/i386/openbsd.h b/contrib/gcc/config/i386/openbsd.h index 5015b9d..60f1657 100644 --- a/contrib/gcc/config/i386/openbsd.h +++ b/contrib/gcc/config/i386/openbsd.h @@ -1,5 +1,5 @@ /* Configuration for an OpenBSD i386 target. - Copyright (C) 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,23 +18,24 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* This is tested by i386gas.h. */ -#define YES_UNDERSCORES -#include <i386/gstabs.h> - -/* Get generic OpenBSD definitions. */ -#define OBSD_OLD_GAS -#include <openbsd.h> +#define TARGET_VERSION fprintf (stderr, " (OpenBSD/i386)"); /* This goes away when the math-emulator is fixed */ #undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT \ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387) -/* Run-time target specifications */ -#define CPP_PREDEFINES "-D__unix__ -D__OpenBSD__ \ - -Asystem=unix -Asystem=bsd -Asystem=OpenBSD" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__unix__"); \ + builtin_define ("__OpenBSD__"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=bsd"); \ + builtin_assert ("system=OpenBSD"); \ + } \ + while (0) /* Layout of source language data types. */ @@ -95,9 +96,6 @@ Boston, MA 02111-1307, USA. */ #undef ASM_PREFERRED_EH_DATA_FORMAT - -/* Note that we pick up ASM_OUTPUT_MI_THUNK from unix.h. */ - #undef ASM_COMMENT_START #define ASM_COMMENT_START ";#" diff --git a/contrib/gcc/config/i386/pentium.md b/contrib/gcc/config/i386/pentium.md new file mode 100644 index 0000000..b4c5ece --- /dev/null +++ b/contrib/gcc/config/i386/pentium.md @@ -0,0 +1,312 @@ +;; Pentium Scheduling +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +;; +;; The Pentium is an in-order core with two integer pipelines. + +;; True for insns that behave like prefixed insns on the Pentium. +(define_attr "pent_prefix" "false,true" + (if_then_else (ior (eq_attr "prefix_0f" "1") + (ior (eq_attr "prefix_data16" "1") + (eq_attr "prefix_rep" "1"))) + (const_string "true") + (const_string "false"))) + +;; Categorize how an instruction slots. + +;; The non-MMX Pentium slots an instruction with prefixes on U pipe only, +;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium +;; rules, because it results in noticeably better code on non-MMX Pentium +;; and doesn't hurt much on MMX. (Prefixed instructions are not very +;; common, so the scheduler usualy has a non-prefixed insn to pair). + +(define_attr "pent_pair" "uv,pu,pv,np" + (cond [(eq_attr "imm_disp" "true") + (const_string "np") + (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec") + (and (eq_attr "type" "pop,push") + (eq_attr "memory" "!both"))) + (if_then_else (eq_attr "pent_prefix" "true") + (const_string "pu") + (const_string "uv")) + (eq_attr "type" "ibr") + (const_string "pv") + (and (eq_attr "type" "ishift") + (match_operand 2 "const_int_operand" "")) + (const_string "pu") + (and (eq_attr "type" "rotate") + (match_operand 2 "const_int_1_operand" "")) + (const_string "pu") + (and (eq_attr "type" "ishift1") + (match_operand 1 "const_int_operand" "")) + (const_string "pu") + (and (eq_attr "type" "rotate1") + (match_operand 1 "const_int_1_operand" "")) + (const_string "pu") + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_string "pv") + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_string "pv") + ] + (const_string "np"))) + +(define_automaton "pentium,pentium_fpu") + +;; Pentium do have U and V pipes. Instruction to both pipes +;; are alwyas issued together, much like on VLIW. +;; +;; predecode +;; / \ +;; decodeu decodev +;; / | | +;; fpu executeu executev +;; | | | +;; fpu retire retire +;; | +;; fpu +;; We add dummy "port" pipes allocated only first cycle of +;; instruction to specify this behavior. + +(define_cpu_unit "pentium-portu,pentium-portv" "pentium") +(define_cpu_unit "pentium-u,pentium-v" "pentium") +(absence_set "pentium-portu" "pentium-u,pentium-v") +(presence_set "pentium-portv" "pentium-portu") + +;; Floating point instructions can overlap with new issue of integer +;; instructions. We model only first cycle of FP pipeline, as it is +;; fully pipelined. +(define_cpu_unit "pentium-fp" "pentium_fpu") + +;; There is non-pipelined multiplier unit used for complex operations. +(define_cpu_unit "pentium-fmul" "pentium_fpu") + +;; Pentium preserves memory ordering, so when load-execute-store +;; instruction is executed together with other instruction loading +;; data, the execution of the other instruction is delayed to very +;; last cycle of first instruction, when data are bypassed. +;; We model this by allocating "memory" unit when store is pending +;; and using conflicting load units together. + +(define_cpu_unit "pentium-memory" "pentium") +(define_cpu_unit "pentium-load0" "pentium") +(define_cpu_unit "pentium-load1" "pentium") +(absence_set "pentium-load0,pentium-load1" "pentium-memory") + +(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)") +(define_reservation "pentium-np" "(pentium-u + pentium-v)") +(define_reservation "pentium-uv" "(pentium-u | pentium-v)") +(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)") +(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)") +(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)") +(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)") +(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)") +(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)") +(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv) + | (pentium-firstv,pentium-v, + (pentium-load+pentium-firstv))") +(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu + + pentium-memory)") +(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstv + + pentium-memory)") +(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv + + pentium-memory) + | (pentium-firstv,pentium-v, + (pentium-load+pentium-firstv))") + +;; Few common long latency instructions +(define_insn_reservation "pent_mul" 11 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "imul")) + "pentium-np*11") + +(define_insn_reservation "pent_str" 12 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "str")) + "pentium-np*12") + +;; Integer division and some other long latency instruction block all +;; units, including the FP pipe. There is no value in modeling the +;; latency of these instructions and not modeling the latency +;; decreases the size of the DFA. +(define_insn_reservation "pent_block" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "idiv")) + "pentium-np+pentium-fp") + +(define_insn_reservation "pent_cld" 2 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "cld")) + "pentium-np*2") + +;; Moves usually have one cycle penalty, but there are exceptions. +(define_insn_reservation "pent_fmov" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none,load"))) + "(pentium-fp+pentium-np)") + +(define_insn_reservation "pent_fpmovxf" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF")))) + "(pentium-fp+pentium-np)*3") + +(define_insn_reservation "pent_fpstore" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (ior (match_operand 1 "immediate_operand" "") + (eq_attr "memory" "store")))) + "(pentium-fp+pentium-np)*2") + +(define_insn_reservation "pent_imov" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "imov")) + "pentium-firstuv") + +;; Push and pop instructions have 1 cycle latency and special +;; hardware bypass allows them to be paired with other push,pop +;; and call instructions. +(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call") +(define_insn_reservation "pent_push" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "push") + (eq_attr "memory" "store"))) + "pentium-firstuv") + +(define_insn_reservation "pent_pop" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "pop")) + "pentium-firstuv") + +;; Call and branch instruction can execute in either pipe, but +;; they are only pairable when in the v pipe. +(define_insn_reservation "pent_call" 10 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "call,callv")) + "pentium-firstv,pentium-v*9") + +(define_insn_reservation "pent_branch" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "ibr")) + "pentium-firstv") + +;; Floating point instruction dispatch in U pipe, but continue +;; in FP pipeline allowing other isntructions to be executed. +(define_insn_reservation "pent_fp" 3 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fop,fistp")) + "(pentium-firstu+pentium-fp),nothing,nothing") + +;; First two cycles of fmul are not pipelined. +(define_insn_reservation "pent_fmul" 3 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fmul")) + "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing") + +;; Long latency FP instructions overlap with integer instructions, +;; but only last 2 cycles with FP ones. +(define_insn_reservation "pent_fdiv" 39 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fdiv")) + "(pentium-np+pentium-fp+pentium-fmul), + (pentium-fp+pentium-fmul)*36,pentium-fmul*2") + +(define_insn_reservation "pent_fpspc" 70 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fpspc")) + "(pentium-np+pentium-fp+pentium-fmul), + (pentium-fp+pentium-fmul)*67,pentium-fmul*2") + +;; Integer instructions. Load/execute/store takes 3 cycles, +;; load/execute 2 cycles and execute only one cycle. +(define_insn_reservation "pent_uv_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "uv") + (eq_attr "memory" "both"))) + "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv") + +(define_insn_reservation "pent_u_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "both"))) + "pentium-firstuboth,pentium-u+pentium-memory,pentium-u") + +(define_insn_reservation "pent_v_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pv") + (eq_attr "memory" "both"))) + "pentium-firstvboth,pentium-v+pentium-memory,pentium-v") + +(define_insn_reservation "pent_np_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "both"))) + "pentium-np,pentium-np,pentium-np") + +(define_insn_reservation "pent_uv_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "uv") + (eq_attr "memory" "load"))) + "pentium-firstuvload,pentium-uv") + +(define_insn_reservation "pent_u_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "load"))) + "pentium-firstuload,pentium-u") + +(define_insn_reservation "pent_v_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pv") + (eq_attr "memory" "load"))) + "pentium-firstvload,pentium-v") + +(define_insn_reservation "pent_np_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "load"))) + "pentium-np,pentium-np") + +(define_insn_reservation "pent_uv" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "uv") + (eq_attr "memory" "none"))) + "pentium-firstuv") + +(define_insn_reservation "pent_u" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "none"))) + "pentium-firstu") + +(define_insn_reservation "pent_v" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pv") + (eq_attr "memory" "none"))) + "pentium-firstv") + +(define_insn_reservation "pent_np" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "none"))) + "pentium-np") + diff --git a/contrib/gcc/config/i386/ppro.md b/contrib/gcc/config/i386/ppro.md new file mode 100644 index 0000000..8690685 --- /dev/null +++ b/contrib/gcc/config/i386/ppro.md @@ -0,0 +1,150 @@ +;; Pentium Pro/PII Scheduling +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ + +;; Categorize how many uops an ia32 instruction evaluates to: +;; one -- an instruction with 1 uop can be decoded by any of the +;; three decoders. +;; few -- an instruction with 1 to 4 uops can be decoded only by +;; decoder 0. +;; many -- a complex instruction may take an unspecified number of +;; cycles to decode in decoder 0. + +(define_attr "ppro_uops" "one,few,many" + (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") + (const_string "many") + (eq_attr "type" "icmov,fcmov,str,cld") + (const_string "few") + (eq_attr "type" "imov") + (if_then_else (eq_attr "memory" "store,both") + (const_string "few") + (const_string "one")) + (eq_attr "memory" "!none") + (const_string "few") + ] + (const_string "one"))) + +;; +;; The PPro has an out-of-order core, but the instruction decoders are +;; naturally in-order and asymmetric. We get best performance by scheduling +;; for the decoders, for in doing so we give the oo execution unit the +;; most choices. +;; +;; Rough readiness numbers. Fine tuning happens in i386.c. +;; +;; p0 describes port 0. +;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either. +;; p2 describes port 2 for loads. +;; p34 describes ports 3 and 4 for stores. +;; fpu describes the fpu accessed via port 0. +;; ??? It is less than clear if there are separate fadd and fmul units +;; that could operate in parallel. +;; +;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "ishift,rotate,ishift1,rotate1,lea,ibr,cld")) + 1 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "imul")) + 4 1) + +;; ??? Does the divider lock out the pipe while it works, +;; or is there a disconnected unit? +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "idiv")) + 17 17) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fop,fsgn,fistp")) + 3 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fcmov")) + 2 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fcmp")) + 1 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fmov")) + 1 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fmul")) + 5 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fdiv,fpspc")) + 56 1) + +(define_function_unit "ppro_p01" 2 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "!imov,fmov")) + 1 1) + +(define_function_unit "ppro_p01" 2 0 + (and (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "imov,fmov")) + (eq_attr "memory" "none")) + 1 1) + +(define_function_unit "ppro_p2" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (ior (eq_attr "type" "pop") + (eq_attr "memory" "load,both"))) + 3 1) + +(define_function_unit "ppro_p34" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (ior (eq_attr "type" "push") + (eq_attr "memory" "store,both"))) + 1 1) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp")) + 1 1) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fmul")) + 5 2) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fdiv,fpspc")) + 56 56) + +;; imul uses the fpu. ??? does it have the same throughput as fmul? +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "imul")) + 4 1) diff --git a/contrib/gcc/config/i386/ptx4-i.h b/contrib/gcc/config/i386/ptx4-i.h index 0c51703..a7d5710 100644 --- a/contrib/gcc/config/i386/ptx4-i.h +++ b/contrib/gcc/config/i386/ptx4-i.h @@ -1,5 +1,5 @@ /* Target definitions for GNU compiler for Intel 80386 running Dynix/ptx v4 - Copyright (C) 1996 Free Software Foundation, Inc. + Copyright (C) 1996, 2002 Free Software Foundation, Inc. Modified from sysv4.h Originally written by Ron Guilmette (rfg@netcom.com). @@ -23,7 +23,6 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 Sequent Dynix/ptx Version 4)"); /* The svr4 ABI for the i386 says that records and unions are returned @@ -34,11 +33,15 @@ Boston, MA 02111-1307, USA. */ (TYPE_MODE (TYPE) == BLKmode \ || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8)) -/* Define which macros to predefine. _SEQUENT_ is our extension. */ -/* This used to define X86, but james@bigtex.cactus.org says that - is supposed to be defined optionally by user programs--not by default. */ -#define CPP_PREDEFINES \ - "-Dunix -D_SEQUENT_ -Asystem=unix -Asystem=ptx4" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_define ("_SEQUENT_"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=ptx4"); \ + } \ + while (0) #undef DBX_REGISTER_NUMBER #define DBX_REGISTER_NUMBER(n) svr4_dbx_register_map[n] diff --git a/contrib/gcc/config/i386/rtemself.h b/contrib/gcc/config/i386/rtemself.h index 7e831b9..0967178 100644 --- a/contrib/gcc/config/i386/rtemself.h +++ b/contrib/gcc/config/i386/rtemself.h @@ -1,4 +1,4 @@ -/* Definitions for rtems targeting a ix86 using ELF. +/* Definitions for rtems targeting an ix86 using ELF. Copyright (C) 1996, 1997, 2000, 2001, 2002 Free Software Foundation, Inc. Contributed by Joel Sherrill (joel@OARcorp.com). @@ -21,11 +21,14 @@ Boston, MA 02111-1307, USA. */ /* Specify predefined symbols in preprocessor. */ -#include <i386/i386elf.h> - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__rtems__ -Asystem=rtems \ - -D__ELF__ -D__i386__ -D__USE_INIT_FINI__" - -#undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{msoft-float:-D_SOFT_FLOAT}" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__rtems__"); \ + builtin_define ("__ELF__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + if (!TARGET_80387) \ + builtin_define ("_SOFT_FLOAT"); \ + } \ + while (0) diff --git a/contrib/gcc/config/i386/sco5.h b/contrib/gcc/config/i386/sco5.h index 36a04d5..815e457 100644 --- a/contrib/gcc/config/i386/sco5.h +++ b/contrib/gcc/config/i386/sco5.h @@ -1,5 +1,5 @@ /* Definitions for Intel 386 running SCO Unix System V 3.2 Version 5. - Copyright (C) 1992, 1995, 1996, 1997, 1998, 1999, 2000 + Copyright (C) 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. Contributed by Kean Johnston (hug@netcom.com) @@ -20,7 +20,6 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386, SCO OpenServer 5 Syntax)"); #undef LPREFIX @@ -78,17 +77,13 @@ Boston, MA 02111-1307, USA. */ #define DWARF2_UNWIND_INFO \ ((TARGET_ELF) ? 1 : 0 ) -#undef CONST_SECTION_ASM_OP -#define CONST_SECTION_ASM_OP_COFF "\t.section\t.rodata, \"x\"" -#define CONST_SECTION_ASM_OP_ELF "\t.section\t.rodata" -#define CONST_SECTION_ASM_OP \ - ((TARGET_ELF) ? CONST_SECTION_ASM_OP_ELF : CONST_SECTION_ASM_OP_COFF) - -#undef USE_CONST_SECTION -#define USE_CONST_SECTION_ELF 1 -#define USE_CONST_SECTION_COFF 0 -#define USE_CONST_SECTION \ - ((TARGET_ELF) ? USE_CONST_SECTION_ELF : USE_CONST_SECTION_COFF) +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP_COFF "\t.section\t.rodata, \"x\"" +#define READONLY_DATA_SECTION_ASM_OP_ELF "\t.section\t.rodata" +#define READONLY_DATA_SECTION_ASM_OP \ + ((TARGET_ELF) \ + ? READONLY_DATA_SECTION_ASM_OP_ELF \ + : READONLY_DATA_SECTION_ASM_OP_COFF) #undef INIT_SECTION_ASM_OP #define INIT_SECTION_ASM_OP_ELF "\t.section\t.init" @@ -144,13 +139,9 @@ do { \ #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ do { \ if (TARGET_ELF) { \ - fprintf (FILE, "%s", TYPE_ASM_OP); \ - assemble_name (FILE, NAME); \ - putc (',', FILE); \ - fprintf (FILE, TYPE_OPERAND_FMT, "function"); \ - putc ('\n', FILE); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ - ASM_OUTPUT_LABEL(FILE, NAME); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ } else \ SCO_DEFAULT_ASM_COFF(FILE, NAME); \ } while (0) @@ -158,34 +149,28 @@ do { \ #undef ASM_DECLARE_FUNCTION_SIZE #define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ do { \ - if (TARGET_ELF) { if (!flag_inhibit_size_directive) \ - { \ - fprintf (FILE, "%s", SIZE_ASM_OP); \ - assemble_name (FILE, (FNAME)); \ - fprintf (FILE, ",.-"); \ - assemble_name (FILE, (FNAME)); \ - putc ('\n', FILE); \ - } } \ + if (TARGET_ELF && !flag_inhibit_size_directive) \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ } while (0) #undef ASM_DECLARE_OBJECT_NAME #define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ do { \ if (TARGET_ELF) { \ - fprintf (FILE, "%s", TYPE_ASM_OP); \ - assemble_name (FILE, NAME); \ - putc (',', FILE); \ - fprintf (FILE, TYPE_OPERAND_FMT, "object"); \ - putc ('\n', FILE); \ + HOST_WIDE_INT size; \ + \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + \ size_directive_output = 0; \ - if (!flag_inhibit_size_directive && DECL_SIZE (DECL)) \ - { \ - size_directive_output = 1; \ - fprintf (FILE, "%s", SIZE_ASM_OP); \ - assemble_name (FILE, NAME); \ - fprintf (FILE, ",%d\n", int_size_in_bytes (TREE_TYPE (DECL))); \ - } \ - ASM_OUTPUT_LABEL(FILE, NAME); \ + if (!flag_inhibit_size_directive \ + && (DECL) && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ + } \ + \ + ASM_OUTPUT_LABEL (FILE, NAME); \ } else \ SCO_DEFAULT_ASM_COFF(FILE, NAME); \ } while (0) @@ -203,17 +188,17 @@ do { \ #undef ASM_FINISH_DECLARE_OBJECT #define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END) \ do { \ - if (TARGET_ELF) { \ + if (TARGET_ELF) { \ const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + HOST_WIDE_INT size; \ if (!flag_inhibit_size_directive && DECL_SIZE (DECL) \ && ! AT_END && TOP_LEVEL \ && DECL_INITIAL (DECL) == error_mark_node \ && !size_directive_output) \ { \ size_directive_output = 1; \ - fprintf (FILE, "%s", SIZE_ASM_OP); \ - assemble_name (FILE, name); \ - fprintf (FILE, ",%d\n", int_size_in_bytes (TREE_TYPE (DECL))); \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ } \ } \ } while (0) @@ -353,19 +338,6 @@ do { \ fprintf ((FILE), "\n"); \ } while (0) -/* Must use data section for relocatable constants when pic. */ -#undef SELECT_RTX_SECTION -#define SELECT_RTX_SECTION(MODE,RTX,ALIGN) \ -{ \ - if (TARGET_ELF) { \ - if (flag_pic && symbolic_operand (RTX, VOIDmode)) \ - data_section (); \ - else \ - const_section (); \ - } else \ - readonly_data_section(); \ -} - #undef ASM_OUTPUT_CASE_LABEL #define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,JUMPTABLE) \ do { \ @@ -378,13 +350,9 @@ do { \ #define ASM_OUTPUT_IDENT(FILE, NAME) \ fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME); -#undef ASM_GLOBALIZE_LABEL -#define ASM_GLOBALIZE_LABEL(FILE,NAME) \ - (fprintf ((FILE), "%s", GLOBAL_ASM_OP), assemble_name (FILE, NAME), fputs ("\n", FILE)) - #undef ASM_OUTPUT_EXTERNAL_LIBCALL #define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ - if (TARGET_ELF) ASM_GLOBALIZE_LABEL (FILE, XSTR (FUN, 0)) + if (TARGET_ELF) (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0)) #undef ASM_OUTPUT_INTERNAL_LABEL #define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ @@ -445,42 +413,23 @@ do { \ #define DBX_REGISTER_NUMBER(n) \ ((TARGET_ELF) ? svr4_dbx_register_map[n] : dbx_register_map[n]) -#undef DWARF2_DEBUGGING_INFO -#undef DWARF_DEBUGGING_INFO -#undef SDB_DEBUGGING_INFO -#undef DBX_DEBUGGING_INFO -#undef PREFERRED_DEBUGGING_TYPE - #define DWARF2_DEBUGGING_INFO 1 #define DWARF_DEBUGGING_INFO 1 -#define SDB_DEBUGGING_INFO 1 -#define DBX_DEBUGGING_INFO 1 +#define SDB_DEBUGGING_INFO 1 +#define DBX_DEBUGGING_INFO 1 + +#undef PREFERRED_DEBUGGING_TYPE #define PREFERRED_DEBUGGING_TYPE \ ((TARGET_ELF) ? DWARF2_DEBUG: SDB_DEBUG) #undef EXTRA_SECTIONS -#define EXTRA_SECTIONS in_const, in_init, in_fini +#define EXTRA_SECTIONS in_init, in_fini #undef EXTRA_SECTION_FUNCTIONS #define EXTRA_SECTION_FUNCTIONS \ - CONST_SECTION_FUNCTION \ INIT_SECTION_FUNCTION \ FINI_SECTION_FUNCTION -#undef CONST_SECTION_FUNCTION -#define CONST_SECTION_FUNCTION \ -void \ -const_section () \ -{ \ - if (!USE_CONST_SECTION) \ - text_section(); \ - else if (in_section != in_const) \ - { \ - fprintf (asm_out_file, "%s\n", CONST_SECTION_ASM_OP); \ - in_section = in_const; \ - } \ -} - #undef FINI_SECTION_FUNCTION #define FINI_SECTION_FUNCTION \ void \ @@ -541,28 +490,9 @@ init_section () \ == void_type_node))) ? (SIZE) \ : 0)) -#undef SELECT_SECTION -#define SELECT_SECTION(DECL,RELOC,ALIGN) \ -{ \ - if (TARGET_ELF && flag_pic && RELOC) \ - data_section (); \ - else if (TREE_CODE (DECL) == STRING_CST) \ - { \ - if (! flag_writable_strings) \ - const_section (); \ - else \ - data_section (); \ - } \ - else if (TREE_CODE (DECL) == VAR_DECL) \ - { \ - if (! DECL_READONLY_SECTION (DECL, RELOC)) \ - data_section (); \ - else \ - const_section (); \ - } \ - else \ - const_section (); \ -} +/* ??? Ignore coff. */ +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION default_elf_select_section #undef SWITCH_TAKES_ARG #define SWITCH_TAKES_ARG(CHAR) \ @@ -581,7 +511,6 @@ init_section () \ #undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS) -#undef HANDLE_SYSV_PRAGMA #define HANDLE_SYSV_PRAGMA 1 /* Though OpenServer supports .weak in COFF, we don't use it. @@ -593,9 +522,6 @@ init_section () \ do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ fputc ('\n', FILE); } while (0) -#undef SCCS_DIRECTIVE -#define SCCS_DIRECTIVE 1 - /* * Define sizes and types */ @@ -673,13 +599,11 @@ init_section () \ %{pg:gcrt.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}}} \ %{ansi:values-Xc.o%s} \ %{!ansi: \ - %{traditional:values-Xt.o%s} \ - %{!traditional: \ - %{Xa:values-Xa.o%s} \ - %{!Xa:%{Xc:values-Xc.o%s} \ - %{!Xc:%{Xk:values-Xk.o%s} \ - %{!Xk:%{Xt:values-Xt.o%s} \ - %{!Xt:values-Xa.o%s}}}}}} \ + %{Xa:values-Xa.o%s} \ + %{!Xa:%{Xc:values-Xc.o%s} \ + %{!Xc:%{Xk:values-Xk.o%s} \ + %{!Xk:%{Xt:values-Xt.o%s} \ + %{!Xt:values-Xa.o%s}}}}} \ %{mcoff:crtbeginS.o%s} %{!mcoff:crtbegin.o%s}" #undef ENDFILE_SPEC @@ -688,20 +612,32 @@ init_section () \ %{mcoff:crtendS.o%s} \ %{pg:gcrtn.o%s}%{!pg:crtn.o%s}" -#undef CPP_PREDEFINES -#define CPP_PREDEFINES \ - "-Asystem=svr3" - -/* You are in a maze of GCC specs ... all alike */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__unix"); \ + builtin_define ("_SCO_DS"); \ + builtin_define ("_M_I386"); \ + builtin_define ("_M_XENIX"); \ + builtin_define ("_M_UNIX"); \ + builtin_assert ("system=svr3"); \ + if (flag_iso) \ + cpp_define (pfile, "_STRICT_ANSI"); \ + if (flag_pic) \ + { \ + builtin_define ("__PIC__"); \ + builtin_define ("__pic__"); \ + } \ + } \ + while (0) #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) \ +#define CPP_SPEC "\ %{fpic:%{mcoff:%e-fpic is not valid with -mcoff}} \ %{fPIC:%{mcoff:%e-fPIC is not valid with -mcoff}} \ - -D__i386 -D__unix -D_SCO_DS=1 -D_M_I386 -D_M_XENIX -D_M_UNIX \ %{!Xods30:-D_STRICT_NAMES} \ %{!ansi:%{!posix:%{!Xods30:-D_SCO_XPG_VERS=4}}} \ - %{ansi:-isystem include/ansi%s -isystem /usr/include/ansi -D_STRICT_ANSI} \ + %{ansi:-isystem include/ansi%s -isystem /usr/include/ansi} \ %{!ansi: \ %{posix:-isystem include/posix%s -isystem /usr/include/posix \ -D_POSIX_C_SOURCE=2 -D_POSIX_SOURCE=1} \ @@ -716,17 +652,13 @@ init_section () \ -DM_BITFIELDS -DM_SYS5 -DM_SYSV -DM_INTERNAT -DM_SYSIII \ -DM_WORDSWAP}}}} \ %{scointl:-DM_INTERNAT -D_M_INTERNAT} \ - %{traditional:-D_KR -D_SVID -D_NO_PROTOTYPE} \ %{!mcoff:-D_SCO_ELF} \ %{mcoff:-D_M_COFF -D_SCO_COFF} \ - %{!mcoff:%{fpic:-D__PIC__ -D__pic__} \ - %{fPIC:%{!fpic:-D__PIC__ -D__pic__}}} \ %{Xa:-D_SCO_C_DIALECT=1} \ %{!Xa:%{Xc:-D_SCO_C_DIALECT=3} \ %{!Xc:%{Xk:-D_SCO_C_DIALECT=4} \ %{!Xk:%{Xt:-D_SCO_C_DIALECT=2} \ - %{!Xt:-D_SCO_C_DIALECT=1}}}} \ - %{traditional:-traditional -D_KR -D_NO_PROTOTYPE}" + %{!Xt:-D_SCO_C_DIALECT=1}}}}" #undef LINK_SPEC #define LINK_SPEC \ diff --git a/contrib/gcc/config/i386/sol2.h b/contrib/gcc/config/i386/sol2.h index 5fa5fcd..fb5a184 100644 --- a/contrib/gcc/config/i386/sol2.h +++ b/contrib/gcc/config/i386/sol2.h @@ -20,36 +20,8 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* We use stabs-in-elf for debugging, because that is what the native - toolchain uses. */ -#undef PREFERRED_DEBUGGING_TYPE -#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG - -#if ! GAS_REJECTS_MINUS_S - -/* - Changed from config/svr4.h in the following ways: - - - Removed -Yd (neither the sun bundled assembler nor gas accept it). - - Added "-s" so that stabs are not discarded. -*/ - -#undef ASM_SPEC -#define ASM_SPEC \ - "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Wa,*:%*} -s" - #define CMOV_SUN_AS_SYNTAX 1 -#else /* GAS_REJECTS_MINUS_S */ - -/* Same as above, except for -s, unsupported by GNU as. */ -#undef ASM_SPEC -#define ASM_SPEC \ - "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Wa,*:%*}" - -#endif /* GAS_REJECTS_MINUS_S */ - /* The Solaris 2.0 x86 linker botches alignment of code sections. It tries to align to a 16 byte boundary by padding with 0x00000090 ints, rather than 0x90 bytes (nop). This generates trash in the @@ -68,92 +40,26 @@ Boston, MA 02111-1307, USA. */ (flag_pic ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_datarel \ : DW_EH_PE_absptr) -/* Solaris 2/Intel uses a wint_t different from the default, as on SPARC. */ -#undef WINT_TYPE -#define WINT_TYPE "long int" - -#undef WINT_TYPE_SIZE -#define WINT_TYPE_SIZE BITS_PER_WORD - -#define HANDLE_PRAGMA_REDEFINE_EXTNAME 1 - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES \ - "-Dunix -D__svr4__ -D__SVR4 -Dsun -D__PRAGMA_REDEFINE_EXTNAME -Asystem=svr4" - /* Solaris 2/Intel as chokes on #line directives. */ #undef CPP_SPEC -#define CPP_SPEC \ - "%{.S:-P} \ - %(cpp_cpu) \ - %{pthreads:-D_REENTRANT -D_PTHREADS} \ - %{!pthreads:%{threads:-D_REENTRANT -D_SOLARIS_THREADS}} \ - %{compat-bsd:-iwithprefixbefore ucbinclude -I/usr/ucbinclude}" +#define CPP_SPEC "%{.S:-P} %(cpp_subtarget)" -/* For C++ we need to add some additional macro definitions required - by the C++ standard library. */ -#define CPLUSPLUS_CPP_SPEC "\ --D_XOPEN_SOURCE=500 -D_LARGEFILE_SOURCE=1 -D_LARGEFILE64_SOURCE=1 \ --D__EXTENSIONS__ \ -%(cpp) \ +/* FIXME: Removed -K PIC from generic Solaris 2 ASM_SPEC: the native assembler + gives many warnings: R_386_32 relocation is used for symbol ".text". */ +#undef ASM_SPEC +#define ASM_SPEC "\ +%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Wa,*:%*} -s \ +%(asm_cpu) \ " -#undef LIB_SPEC -#define LIB_SPEC \ - "%{compat-bsd:-lucb -lsocket -lnsl -lelf -laio} \ - %{!shared:\ - %{!symbolic:\ - %{pthreads:-lpthread} \ - %{!pthreads:%{threads:-lthread}} \ - -lc}}" - -#undef ENDFILE_SPEC -#define ENDFILE_SPEC "crtend.o%s %{pg:crtn.o%s}%{!pg:crtn.o%s}" - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC "%{!shared: \ - %{!symbolic: \ - %{pg:gcrt1.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}}}\ - %{pg:gmon.o%s} crti.o%s \ - %{ansi:values-Xc.o%s} \ - %{!ansi: \ - %{traditional:values-Xt.o%s} \ - %{!traditional:values-Xa.o%s}} \ - crtbegin.o%s" - -/* This should be the same as in svr4.h, except with -R added. */ -#undef LINK_SPEC -#define LINK_SPEC \ - "%{h*} %{v:-V} \ - %{b} %{Wl,*:%*} \ - %{static:-dn -Bstatic} \ - %{shared:-G -dy %{!mimpure-text:-z text}} \ - %{symbolic:-Bsymbolic -G -dy -z text} \ - %{G:-G} \ - %{YP,*} \ - %{R*} \ - %{compat-bsd: \ - %{!YP,*:%{pg:-Y P,/usr/ucblib:/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \ - %{!pg:%{p:-Y P,/usr/ucblib:/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \ - %{!p:-Y P,/usr/ucblib:/usr/ccs/lib:/usr/lib}}} \ - -R /usr/ucblib} \ - %{!compat-bsd: \ - %{!YP,*:%{pg:-Y P,/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \ - %{!pg:%{p:-Y P,/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \ - %{!p:-Y P,/usr/ccs/lib:/usr/lib}}}} \ - %{Qy:} %{!Qn:-Qy}" - -/* This defines which switch letters take arguments. - It is as in svr4.h but with -R added. */ - -#undef SWITCH_TAKES_ARG -#define SWITCH_TAKES_ARG(CHAR) \ - (DEFAULT_SWITCH_TAKES_ARG(CHAR) \ - || (CHAR) == 'R' \ - || (CHAR) == 'h' \ - || (CHAR) == 'z') - -#define STDC_0_IN_SYSTEM_HEADERS 1 +#define ASM_CPU_SPEC "" + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "cpp_subtarget", CPP_SUBTARGET_SPEC }, \ + { "asm_cpu", ASM_CPU_SPEC }, \ + { "startfile_arch", STARTFILE_ARCH_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC } #undef LOCAL_LABEL_PREFIX #define LOCAL_LABEL_PREFIX "." diff --git a/contrib/gcc/config/i386/svr3dbx.h b/contrib/gcc/config/i386/svr3dbx.h index b0e4237..4be7a70 100644 --- a/contrib/gcc/config/i386/svr3dbx.h +++ b/contrib/gcc/config/i386/svr3dbx.h @@ -1,5 +1,5 @@ -/* Definitions for Intel 386 running system V, using dbx-in-coff encapsulation. - Copyright (C) 1992, 1995 Free Software Foundation, Inc. +/* Definitions for Intel 385 running system V, using dbx-in-coff encapsulation. + Copyright (C) 1992, 1995, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,15 +18,13 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "i386/svr3gas.h" - /* We do not want to output SDB debugging information. */ #undef SDB_DEBUGGING_INFO /* We want to output DBX debugging information. */ -#define DBX_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 /* Compensate for botch in dbxout_init/dbxout_source_file which unconditionally drops the first character from ltext_label_name */ diff --git a/contrib/gcc/config/i386/svr3gas.h b/contrib/gcc/config/i386/svr3gas.h index 08ada11..b9d94b7 100644 --- a/contrib/gcc/config/i386/svr3gas.h +++ b/contrib/gcc/config/i386/svr3gas.h @@ -1,5 +1,5 @@ /* Definitions for Intel 386 running system V, using gas. - Copyright (C) 1992, 1996, 2000 Free Software Foundation, Inc. + Copyright (C) 1992, 1996, 2000, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,7 +18,7 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "i386/gas.h" +#define TARGET_VERSION fprintf (stderr, " (80386, ATT syntax)"); /* Add stuff that normally comes from i386/sysv3.h */ @@ -83,22 +83,8 @@ Boston, MA 02111-1307, USA. */ this file should be rewritten to include config/svr3.h and override what isn't right. */ -/* Support const sections and the ctors and dtors sections for g++. - Note that there appears to be two different ways to support const - sections at the moment. You can either #define the symbol - READONLY_DATA_SECTION (giving it some code which switches to the - readonly data section) or else you can #define the symbols - EXTRA_SECTIONS, EXTRA_SECTION_FUNCTIONS, SELECT_SECTION, and - SELECT_RTX_SECTION. We do both here just to be on the safe side. - However, use of the const section is turned off by default - unless the specific tm.h file turns it on by defining - USE_CONST_SECTION as 1. */ - -#define USE_CONST_SECTION 0 - #define INIT_SECTION_ASM_OP "\t.section\t.init" #define FINI_SECTION_ASM_OP "\t.section .fini,\"x\"" -#define CONST_SECTION_ASM_OP "\t.section\t.rodata, \"x\"" #define CTORS_SECTION_ASM_OP INIT_SECTION_ASM_OP #define DTORS_SECTION_ASM_OP FINI_SECTION_ASM_OP @@ -122,14 +108,11 @@ do { \ (*--p) (); \ } while (0) -/* Add extra sections .rodata, .init and .fini. */ - #undef EXTRA_SECTIONS -#define EXTRA_SECTIONS in_const, in_init, in_fini +#define EXTRA_SECTIONS in_init, in_fini #undef EXTRA_SECTION_FUNCTIONS #define EXTRA_SECTION_FUNCTIONS \ - CONST_SECTION_FUNCTION \ INIT_SECTION_FUNCTION \ FINI_SECTION_FUNCTION @@ -155,56 +138,4 @@ fini_section () \ } \ } -#define READONLY_DATA_SECTION() const_section () - -#define CONST_SECTION_FUNCTION \ -void \ -const_section () \ -{ \ - if (!USE_CONST_SECTION) \ - text_section(); \ - else if (in_section != in_const) \ - { \ - fprintf (asm_out_file, "%s\n", CONST_SECTION_ASM_OP); \ - in_section = in_const; \ - } \ -} - #define TARGET_ASM_CONSTRUCTOR ix86_svr3_asm_out_constructor - -/* A C statement or statements to switch to the appropriate - section for output of DECL. DECL is either a `VAR_DECL' node - or a constant of some sort. RELOC indicates whether forming - the initial value of DECL requires link-time relocations. */ - -#define SELECT_SECTION(DECL,RELOC,ALIGN) \ -{ \ - if (TREE_CODE (DECL) == STRING_CST) \ - { \ - if (! flag_writable_strings) \ - const_section (); \ - else \ - data_section (); \ - } \ - else if (TREE_CODE (DECL) == VAR_DECL) \ - { \ - if ((0 && RELOC) /* should be (flag_pic && RELOC) */ \ - || !TREE_READONLY (DECL) || TREE_SIDE_EFFECTS (DECL) \ - || !DECL_INITIAL (DECL) \ - || (DECL_INITIAL (DECL) != error_mark_node \ - && !TREE_CONSTANT (DECL_INITIAL (DECL)))) \ - data_section (); \ - else \ - const_section (); \ - } \ - else \ - const_section (); \ -} - -/* A C statement or statements to switch to the appropriate - section for output of RTX in mode MODE. RTX is some kind - of constant in RTL. The argument MODE is redundant except - in the case of a `const_int' rtx. Currently, these always - go into the const section. */ - -#define SELECT_RTX_SECTION(MODE,RTX,ALIGN) const_section() diff --git a/contrib/gcc/config/i386/sysv3.h b/contrib/gcc/config/i386/sysv3.h index 8eb4bec..93f9445 100644 --- a/contrib/gcc/config/i386/sysv3.h +++ b/contrib/gcc/config/i386/sysv3.h @@ -1,5 +1,5 @@ /* Definitions for Intel 386 running system V. - Copyright (C) 1988, 1996, 2000 Free Software Foundation, Inc. + Copyright (C) 1988, 1996, 2000, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,17 +18,7 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include "i386/i386.h" - -/* Use default settings for system V.3. */ - -#include "svr3.h" - -/* Use the ATT assembler syntax. - This overrides at least one macro (USER_LABEL_PREFIX) from svr3.h. */ - -#include "i386/att.h" +#define TARGET_VERSION fprintf (stderr, " (80386, ATT syntax)"); /* Use crt1.o as a startup file and crtn.o as a closing file. */ @@ -42,11 +32,17 @@ Boston, MA 02111-1307, USA. */ /* Specify predefined symbols in preprocessor. */ -#define CPP_PREDEFINES "-Dunix -Asystem=svr3" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_assert ("system=svr3"); \ + } \ + while (0) -#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" -/* Writing `int' for a bitfield forces int alignment for the structure. */ +/* Writing `int' for a bit-field forces int alignment for the structure. */ #define PCC_BITFIELD_TYPE_MATTERS 1 @@ -57,8 +53,7 @@ Boston, MA 02111-1307, USA. */ /* We want to be able to get DBX debugging information via -gstabs. */ -#undef DBX_DEBUGGING_INFO -#define DBX_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 #undef PREFERRED_DEBUGGING_TYPE #define PREFERRED_DEBUGGING_TYPE SDB_DEBUG diff --git a/contrib/gcc/config/i386/sysv4-cpp.h b/contrib/gcc/config/i386/sysv4-cpp.h new file mode 100644 index 0000000..5b46bf1 --- /dev/null +++ b/contrib/gcc/config/i386/sysv4-cpp.h @@ -0,0 +1,32 @@ +/* Target definitions for GNU compiler for Intel 80386 running System V.4 + Copyright (C) 1991, 2001, 2002 Free Software Foundation, Inc. + + Written by Ron Guilmette (rfg@netcom.com). + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_define ("__svr4__"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=svr4"); \ + } \ + while (0) + diff --git a/contrib/gcc/config/i386/sysv4.h b/contrib/gcc/config/i386/sysv4.h index 7dc0ac1..6b3335e 100644 --- a/contrib/gcc/config/i386/sysv4.h +++ b/contrib/gcc/config/i386/sysv4.h @@ -1,5 +1,5 @@ /* Target definitions for GNU compiler for Intel 80386 running System V.4 - Copyright (C) 1991, 2001 Free Software Foundation, Inc. + Copyright (C) 1991, 2001, 2002 Free Software Foundation, Inc. Written by Ron Guilmette (rfg@netcom.com). @@ -21,7 +21,6 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (i386 System V Release 4)"); /* The svr4 ABI for the i386 says that records and unions are returned @@ -32,12 +31,6 @@ Boston, MA 02111-1307, USA. */ (TYPE_MODE (TYPE) == BLKmode \ || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8)) -/* Define which macros to predefine. __svr4__ is our extension. */ -/* This used to define X86, but james@bigtex.cactus.org says that - is supposed to be defined optionally by user programs--not by default. */ -#define CPP_PREDEFINES \ - "-Dunix -D__svr4__ -Asystem=unix -Asystem=svr4" - /* Output at beginning of assembler file. */ /* The .file command should always begin the output. */ @@ -141,6 +134,3 @@ Boston, MA 02111-1307, USA. */ "addl\t$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0" \ : "=d"(BASE)) #endif - -#undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu)" diff --git a/contrib/gcc/config/i386/sysv5.h b/contrib/gcc/config/i386/sysv5.h index 87d6b9c..9b759f4 100644 --- a/contrib/gcc/config/i386/sysv5.h +++ b/contrib/gcc/config/i386/sysv5.h @@ -31,4 +31,4 @@ Boston, MA 02111-1307, USA. */ %{!shared:%{!symbolic:-lc -lcrt}}" #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{pthread:-D_REENTRANT} %{pthreadT:-D_REENTRANT}" +#define CPP_SPEC "%{pthread:-D_REENTRANT} %{pthreadT:-D_REENTRANT}" diff --git a/contrib/gcc/config/i386/t-cygwin b/contrib/gcc/config/i386/t-cygwin index 68d2ac5..6fcb834 100644 --- a/contrib/gcc/config/i386/t-cygwin +++ b/contrib/gcc/config/i386/t-cygwin @@ -14,7 +14,7 @@ LIBGCC2_INCLUDES = -I$(srcdir)/../winsup/include \ -I$(srcdir)/../winsup/cygwin/include \ -I$(srcdir)/../winsup/w32api/include -winnt.o: $(srcdir)/config/i386/winnt.c $(RTL_H) $(TREE_H) $(CONFIG_H) +winnt.o: $(srcdir)/config/i386/winnt.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_P_H) $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/winnt.c # Don't run fixproto diff --git a/contrib/gcc/config/i386/t-interix b/contrib/gcc/config/i386/t-interix index adcf593..710de8b 100644 --- a/contrib/gcc/config/i386/t-interix +++ b/contrib/gcc/config/i386/t-interix @@ -1,6 +1,6 @@ LIB1ASMSRC = i386/cygwin.asm LIB1ASMFUNCS = _chkstk -interix.o: $(srcdir)/config/i386/interix.c - $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/interix.c +winnt.o: $(srcdir)/config/i386/winnt.c $(TM_P_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/winnt.c diff --git a/contrib/gcc/config/i386/t-linux64 b/contrib/gcc/config/i386/t-linux64 index 31b6ad4..3b109d8 100644 --- a/contrib/gcc/config/i386/t-linux64 +++ b/contrib/gcc/config/i386/t-linux64 @@ -12,3 +12,9 @@ LIBGCC = stmp-multilib INSTALL_LIBGCC = install-multilib EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o + +# The pushl in CTOR initialization interferes with frame pointer elimination. +# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables, +# because then __FRAME_END__ might not be the last thing in .eh_frame +# section. +CRTSTUFF_T_CFLAGS = -fno-omit-frame-pointer -fno-asynchronous-unwind-tables diff --git a/contrib/gcc/config/i386/t-mingw32 b/contrib/gcc/config/i386/t-mingw32 index fe948c6..28096f8 100644 --- a/contrib/gcc/config/i386/t-mingw32 +++ b/contrib/gcc/config/i386/t-mingw32 @@ -2,3 +2,6 @@ # collect2 doesn't work for i386-mingw32* yet. # USE_COLLECT2= + +# We hide calls to w32api needed for w32 thread support here: +LIB2FUNCS_EXTRA = $(srcdir)/config/i386/gthr-win32.c diff --git a/contrib/gcc/config/i386/t-sco5gas b/contrib/gcc/config/i386/t-sco5gas index 2d0b48a..edeb554 100644 --- a/contrib/gcc/config/i386/t-sco5gas +++ b/contrib/gcc/config/i386/t-sco5gas @@ -1,6 +1,6 @@ # The pushl in CTOR initialization interferes with frame pointer elimination. CRTSTUFF_T_CFLAGS = -fPIC -fno-omit-frame-pointer -CRTSTUFF_T_CFLAGS_S = -mcoff -fno-omit-frame-pointer +CRTSTUFF_T_CFLAGS_S = -fno-omit-frame-pointer # # I am still a little unsure of the multilib architecture. The following diff --git a/contrib/gcc/config/i386/unix.h b/contrib/gcc/config/i386/unix.h index f7e38b4..e69f26d 100644 --- a/contrib/gcc/config/i386/unix.h +++ b/contrib/gcc/config/i386/unix.h @@ -1,5 +1,5 @@ /* Definitions for Unix assembler syntax for the Intel 80386. - Copyright (C) 1988, 1994, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1988, 1994, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -58,26 +58,10 @@ Boston, MA 02111-1307, USA. */ #define BSS_SECTION_ASM_OP "\t.bss" -/* This is how to output a command to make the user-level label named NAME - defined for reference from other files. */ - -#define ASM_GLOBALIZE_LABEL(FILE,NAME) \ - (fputs (".globl ", FILE), assemble_name (FILE, NAME), fputs ("\n", FILE)) +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP ".globl " /* By default, target has a 80387, uses IEEE compatible arithmetic, and returns float values in the 387. */ #define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS) - -/* Floating-point return values come in the FP register. */ - -#define VALUE_REGNO(MODE) \ - (GET_MODE_CLASS (MODE) == MODE_FLOAT \ - && TARGET_FLOAT_RETURNS_IN_80387 ? FIRST_FLOAT_REG \ - : (MODE) == TImode || VECTOR_MODE_P (MODE) ? FIRST_SSE_REG \ - : 0) - -/* Output code to add DELTA to the first argument, and then jump to FUNCTION. - Used for C++ multiple inheritance. */ -#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \ - x86_output_mi_thunk (FILE, DELTA, FUNCTION); diff --git a/contrib/gcc/config/i386/uwin.h b/contrib/gcc/config/i386/uwin.h index ca39ffd..1210510 100644 --- a/contrib/gcc/config/i386/uwin.h +++ b/contrib/gcc/config/i386/uwin.h @@ -2,7 +2,7 @@ hosting on U/WIN (Windows32), using GNU tools and the Windows32 API Library, as distinct from winnt.h, which is used to build GCC for use with a windows style library and tool set and uses the Microsoft tools. - Copyright (C) 1999 Free Software Foundation, Inc. + Copyright (C) 1999, 2002 Free Software Foundation, Inc. Contributed by Mumit Khan <khan@xraylith.wisc.edu>. This file is part of GNU CC. @@ -25,26 +25,27 @@ Boston, MA 02111-1307, USA. */ /* Most of this is the same as for Cygwin32, except for changing some specs. */ -#include "i386/cygwin.h" - #define STANDARD_INCLUDE_COMPONENT "UWIN" #define SYSTEM_INCLUDE_DIR "/usr/gnu/include" #undef MD_STARTFILE_PREFIX #define MD_STARTFILE_PREFIX "/usr/gnu/lib/" -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D_WIN32 -D__WIN32__ \ - -D_UWIN -DWINNT -D_X86_=1 -D__STDC__=1 \ - -D__UWIN__ -D__MSVCRT__ \ - -D_STD_INCLUDE_DIR=mingw32 \ - -D__stdcall=__attribute__((__stdcall__)) \ - _D_stdcall=__attribute__((__stdcall__)) \ - -D__cdecl=__attribute__((__cdecl__)) \ - -D__declspec(x)=__attribute__((x)) \ - -Asystem=winnt" +#undef MAYBE_UWIN_CPP_BUILTINS +#define MAYBE_UWIN_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("WINNT"); \ + builtin_define ("_WIN32"); \ + builtin_define ("__WIN32__"); \ + builtin_define ("_UWIN"); \ + builtin_define ("__UWIN__"); \ + builtin_define ("__MSVCRT__"); \ + builtin_define ("_STD_INCLUDE_DIR=mingw32"); \ + } \ + while (0) #undef CPP_SPEC -#define CPP_SPEC "-remap %(cpp_cpu) %{posix:-D_POSIX_SOURCE} \ +#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \ -include /usr/include/astwin32.h \ -idirafter /usr/gnu/include/mingw32" diff --git a/contrib/gcc/config/i386/vsta.h b/contrib/gcc/config/i386/vsta.h index 1bb897d..9388329 100644 --- a/contrib/gcc/config/i386/vsta.h +++ b/contrib/gcc/config/i386/vsta.h @@ -1,5 +1,5 @@ /* Configuration for an i386 running VSTa micro-kernel. - Copyright (C) 1994 Free Software Foundation, Inc. + Copyright (C) 1994, 2002 Free Software Foundation, Inc. Contributed by Rob Savoye (rob@cygnus.com). This file is part of GNU CC. @@ -19,11 +19,14 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#define YES_UNDERSCORES +#define TARGET_VERSION fprintf (stderr, " (80386, BSD syntax)"); -#include "i386/gas.h" - -#ifdef CPP_PREDEFINES -#undef CPP_PREDEFINES -#endif -#define CPP_PREDEFINES "-Dunix -DVSTA -Asystem=unix -Asystem=vsta" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + builtin_define ("VSTA"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=vsta"); \ + } \ + while (0) diff --git a/contrib/gcc/config/i386/vxi386.h b/contrib/gcc/config/i386/vxi386.h index c050ca7..ee4a740 100644 --- a/contrib/gcc/config/i386/vxi386.h +++ b/contrib/gcc/config/i386/vxi386.h @@ -1,5 +1,5 @@ /* Definitions of target machine for GNU compiler. VxWorks i386 version. - Copyright (C) 1998 Free Software Foundation, Inc. + Copyright (C) 1998, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,29 +18,33 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#undef CPP_CPU_SPEC -#define CPP_CPU_SPEC "\ --Asystem=unix -Acpu=i386 -Amachine=i386 \ -%{!ansi:-Di386} -D__i386 -D__i386__ \ -%{march=i386:-DCPU=I80386} \ -%{march=i486:-DCPU=I80486 %(cpp_486)} \ -%{march=pentium:-DCPU=PENTIUM -DCPU_VARIANT=PENTIUM %(cpp_586)} \ -%{march=pentiumpro:-DCPU=PENTIUM -DCPU_VARIANT=PENTIUMPRO %(cpp_686)} \ -%{!march=*: \ - %{mcpu=i386:-DCPU=I80386} \ - %{mcpu=i486:-DCPU=I80486 %(cpp_486)} %{m486:-DCPU=I80486 %(cpp_486)} \ - %{mpentium:-DCPU=PENTIUM -DCPU_VARIANT=PENTIUM %(cpp_586)} \ - %{mcpu=pentium:-DCPU=PENTIUM -DCPU_VARIANT=PENTIUM %(cpp_586)} \ - %{mpentiumpro:-DCPU=PENTIUM -DCPU_VARIANT=PENTIUMPRO %(cpp_686)} \ - %{mcpu=pentiumpro:-DCPU=PENTIUM -DCPU_VARIANT=PENTIUMPRO %(cpp_686)} \ - %{!mcpu*:%{!m486:%{!mpentium*:-DCPU=I80386 %(cpp_cpu_default)}}}}" - -#include "i386/i386-aout.h" - -#define HANDLE_SYSV_PRAGMA - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D__vxworks" +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (80386, VxWorks BSD syntax)"); + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__vxworks"); \ + builtin_assert ("system=unix"); \ + \ + if (TARGET_386) \ + builtin_define ("CPU=I80386"); \ + else if (TARGET_486) \ + builtin_define ("CPU=I80486"); \ + else if (TARGET_PENTIUM) \ + { \ + builtin_define ("CPU=PENTIUM"); \ + builtin_define ("CPU_VARIANT=PENTIUM"); \ + } \ + else if (TARGET_PENTIUMPRO) \ + { \ + builtin_define ("CPU=PENTIUM"); \ + builtin_define ("CPU_VARIANT=PENTIUMPRO"); \ + } \ + } \ + while (0) + +#define HANDLE_SYSV_PRAGMA 1 /* VxWorks does all the library stuff itself. */ diff --git a/contrib/gcc/config/i386/win32.h b/contrib/gcc/config/i386/win32.h index 0aa7a57..93f58c9 100644 --- a/contrib/gcc/config/i386/win32.h +++ b/contrib/gcc/config/i386/win32.h @@ -2,7 +2,7 @@ hosting on Windows NT 3.x, using a Unix style C library and tools, as distinct from winnt.h, which is used to build GCC for use with a windows style library and tool set and uses the Microsoft tools. - Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000 + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. This file is part of GNU CC. @@ -22,15 +22,15 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#define YES_UNDERSCORES - /* Enable parsing of #pragma pack(push,<n>) and #pragma pack(pop). */ #define HANDLE_PRAGMA_PACK_PUSH_POP 1 -#define DBX_DEBUGGING_INFO -#define SDB_DEBUGGING_INFO +#define DBX_DEBUGGING_INFO 1 +#define SDB_DEBUGGING_INFO 1 #define PREFERRED_DEBUGGING_TYPE DBX_DEBUG +#include "i386/unix.h" +#include "i386/bsd.h" #include "i386/gas.h" #include "dbxcoff.h" @@ -62,11 +62,24 @@ Boston, MA 02111-1307, USA. */ { "no-nop-fun-dllimport", MASK_NOP_FUN_DLLIMPORT, "" }, -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-D_WIN32 -DWINNT -D_X86_=1 \ - -D__stdcall=__attribute__((__stdcall__)) \ - -D__cdecl=__attribute__((__cdecl__)) \ - -Asystem=winnt" +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_WIN32"); \ + builtin_define_std ("WINNT"); \ + builtin_define ("_X86_"); \ + builtin_define ("__stdcall=__attribute__((__stdcall__))"); \ + builtin_define ("__cdecl=__attribute__((__cdecl__))"); \ + builtin_assert ("system=winnt"); \ + if (TARGET_CYGWIN) \ + { \ + builtin_define ("__CYGWIN32__"); \ + builtin_define ("__CYGWIN__"); \ + } \ + else \ + builtin_define ("__MINGW32__"); \ + } \ + while (0) #undef STARTFILE_SPEC @@ -74,9 +87,8 @@ Boston, MA 02111-1307, USA. */ %{mcygwin:crt0%O%s} %{pg:gcrt0%O%s}}" #undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} \ - %{!mcygwin:-iwithprefixbefore include/mingw32 -D__MINGW32__} \ - %{mcygwin:-D__CYGWIN32__ -D__CYGWIN__}" +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} \ + %{!mcygwin:-iwithprefixbefore include/mingw32}" /* We have to dynamic link to get to the system DLLs. All of libc, libm and the Unix stuff is in cygwin.dll. The import library is called @@ -95,7 +107,6 @@ Boston, MA 02111-1307, USA. */ #define SIZE_TYPE "unsigned int" #define PTRDIFF_TYPE "int" -#define WCHAR_UNSIGNED 1 #define WCHAR_TYPE_SIZE 16 #define WCHAR_TYPE "short unsigned int" /* Currently we do not have the atexit() function, @@ -103,63 +114,10 @@ Boston, MA 02111-1307, USA. */ #define NEED_ATEXIT 1 -/* Define this macro if references to a symbol must be treated - differently depending on something about the variable or - function named by the symbol (such as what section it is in). - - On i386, if using PIC, mark a SYMBOL_REF for a non-global symbol - so that we may access it directly in the GOT. - - On i386 running Windows NT, modify the assembler name with a suffix - consisting of an atsign (@) followed by string of digits that represents - the number of bytes of arguments passed to the function, if it has the - attribute STDCALL. */ - -#ifdef ENCODE_SECTION_INFO -#undef ENCODE_SECTION_INFO -#define ENCODE_SECTION_INFO(DECL) \ -do \ - { \ - if (flag_pic) \ - { \ - rtx rtl = (TREE_CODE_CLASS (TREE_CODE (DECL)) != 'd' \ - ? TREE_CST_RTL (DECL) : DECL_RTL (DECL)); \ - SYMBOL_REF_FLAG (XEXP (rtl, 0)) \ - = (TREE_CODE_CLASS (TREE_CODE (DECL)) != 'd' \ - || ! TREE_PUBLIC (DECL)); \ - } \ - if (TREE_CODE (DECL) == FUNCTION_DECL) \ - if (lookup_attribute ("stdcall", \ - TYPE_ATTRIBUTES (TREE_TYPE (DECL)))) \ - XEXP (DECL_RTL (DECL), 0) = \ - gen_rtx (SYMBOL_REF, Pmode, gen_stdcall_suffix (DECL)); \ - } \ -while (0) -#endif - -/* This macro gets just the user-specified name - out of the string in a SYMBOL_REF. Discard - trailing @[NUM] encoded by ENCODE_SECTION_INFO. - Do we need the stripping of leading '*'? */ -#undef STRIP_NAME_ENCODING -#define STRIP_NAME_ENCODING(VAR,SYMBOL_NAME) \ -do { \ - const char *_p; \ - const char *const _name = ((SYMBOL_NAME) + ((SYMBOL_NAME)[0] == '*'));\ - for (_p = _name; *_p && *_p != '@'; ++_p) \ - ; \ - if (*_p == '@') \ - { \ - int _len = _p - _name; \ - char *_new_name = (char *) alloca (_len + 1); \ - strncpy (_new_name, _name, _len); \ - _new_name[_len] = '\0'; \ - (VAR) = _new_name; \ - } \ - else \ - (VAR) = _name; \ -} while (0) - +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING i386_pe_strip_name_encoding_full /* Emit code to check the stack when allocating more that 4000 bytes in one go. */ @@ -187,8 +145,8 @@ do { \ symbols must be explicitly imported from shared libraries (DLLs). */ #define MULTIPLE_SYMBOL_SPACES -extern void i386_pe_unique_section (); -#define UNIQUE_SECTION(DECL,RELOC) i386_pe_unique_section (DECL, RELOC) +extern void i386_pe_unique_section PARAMS ((tree, int)); +#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section #define SUPPORTS_ONE_ONLY 1 diff --git a/contrib/gcc/config/i386/winnt.c b/contrib/gcc/config/i386/winnt.c index 6928a8c..00b3dfd 100644 --- a/contrib/gcc/config/i386/winnt.c +++ b/contrib/gcc/config/i386/winnt.c @@ -1,6 +1,7 @@ /* Subroutines for insn-output.c for Windows NT. Contributed by Douglas Rupp (drupp@cs.washington.edu) - Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. This file is part of GNU CC. @@ -30,6 +31,7 @@ Boston, MA 02111-1307, USA. */ #include "tm_p.h" #include "toplev.h" #include "hashtab.h" +#include "ggc.h" /* i386/PE specific attribute support. @@ -133,7 +135,7 @@ associated_type (decl) return t; } -/* Return non-zero if DECL is a dllexport'd object. */ +/* Return nonzero if DECL is a dllexport'd object. */ int i386_pe_dllexport_p (decl) @@ -160,7 +162,7 @@ i386_pe_dllexport_p (decl) return 0; } -/* Return non-zero if DECL is a dllimport'd object. */ +/* Return nonzero if DECL is a dllimport'd object. */ int i386_pe_dllimport_p (decl) @@ -191,22 +193,24 @@ i386_pe_dllimport_p (decl) return 0; } -/* Return non-zero if SYMBOL is marked as being dllexport'd. */ +/* Return nonzero if SYMBOL is marked as being dllexport'd. */ int i386_pe_dllexport_name_p (symbol) const char *symbol; { - return symbol[0] == '@' && symbol[1] == 'e' && symbol[2] == '.'; + return symbol[0] == DLL_IMPORT_EXPORT_PREFIX + && symbol[1] == 'e' && symbol[2] == '.'; } -/* Return non-zero if SYMBOL is marked as being dllimport'd. */ +/* Return nonzero if SYMBOL is marked as being dllimport'd. */ int i386_pe_dllimport_name_p (symbol) const char *symbol; { - return symbol[0] == '@' && symbol[1] == 'i' && symbol[2] == '.'; + return symbol[0] == DLL_IMPORT_EXPORT_PREFIX + && symbol[1] == 'i' && symbol[2] == '.'; } /* Mark a DECL as being dllexport'd. @@ -235,7 +239,7 @@ i386_pe_mark_dllexport (decl) return; /* already done */ newname = alloca (strlen (oldname) + 4); - sprintf (newname, "@e.%s", oldname); + sprintf (newname, "%ce.%s", DLL_IMPORT_EXPORT_PREFIX, oldname); /* We pass newname through get_identifier to ensure it has a unique address. RTL processing can sometimes peek inside the symbol ref @@ -310,7 +314,7 @@ i386_pe_mark_dllimport (decl) } newname = alloca (strlen (oldname) + 11); - sprintf (newname, "@i._imp__%s", oldname); + sprintf (newname, "%ci._imp__%s", DLL_IMPORT_EXPORT_PREFIX, oldname); /* We pass newname through get_identifier to ensure it has a unique address. RTL processing can sometimes peek inside the symbol ref @@ -365,12 +369,14 @@ gen_stdcall_suffix (decl) return IDENTIFIER_POINTER (get_identifier (newsym)); } -/* Cover function to implement ENCODE_SECTION_INFO. */ - void -i386_pe_encode_section_info (decl) +i386_pe_encode_section_info (decl, first) tree decl; + int first; { + if (!first) + return; + /* This bit is copied from i386.h. */ if (optimize > 0 && TREE_CONSTANT (decl) && (!flag_writable_strings || TREE_CODE (decl) != STRING_CST)) @@ -395,8 +401,8 @@ i386_pe_encode_section_info (decl) i386_pe_mark_dllimport (decl); /* It might be that DECL has already been marked as dllimport, but a subsequent definition nullified that. The attribute is gone but - DECL_RTL still has @i._imp__foo. We need to remove that. Ditto - for the DECL_NON_ADDR_CONST_P flag. */ + DECL_RTL still has (DLL_IMPORT_EXPORT_PREFIX)i._imp__foo. We need + to remove that. Ditto for the DECL_NON_ADDR_CONST_P flag. */ else if ((TREE_CODE (decl) == FUNCTION_DECL || TREE_CODE (decl) == VAR_DECL) && DECL_RTL (decl) != NULL_RTX @@ -418,7 +424,34 @@ i386_pe_encode_section_info (decl) } } -/* Cover function for UNIQUE_SECTION. */ +/* Strip only the leading encoding, leaving the stdcall suffix. */ + +const char * +i386_pe_strip_name_encoding (str) + const char *str; +{ + if (*str == DLL_IMPORT_EXPORT_PREFIX) + str += 3; + if (*str == '*') + str += 1; + return str; +} + +/* Also strip the stdcall suffix. */ + +const char * +i386_pe_strip_name_encoding_full (str) + const char *str; +{ + const char *p; + const char *name = i386_pe_strip_name_encoding (str); + + p = strchr (name, '@'); + if (p) + return ggc_alloc_string (name, p - name); + + return name; +} void i386_pe_unique_section (decl, reloc) @@ -430,8 +463,7 @@ i386_pe_unique_section (decl, reloc) char *string; name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); - /* Strip off any encoding in fnname. */ - STRIP_NAME_ENCODING (name, name); + name = i386_pe_strip_name_encoding_full (name); /* The object is put in, for example, section .text$foo. The linker will then ultimately place them in .text @@ -441,15 +473,8 @@ i386_pe_unique_section (decl, reloc) without a .rdata section. */ if (TREE_CODE (decl) == FUNCTION_DECL) prefix = ".text$"; -/* else if (DECL_INITIAL (decl) == 0 - || DECL_INITIAL (decl) == error_mark_node) - prefix = ".bss"; */ - else if (DECL_READONLY_SECTION (decl, reloc)) -#ifdef READONLY_DATA_SECTION + else if (decl_readonly_section (decl, reloc)) prefix = ".rdata$"; -#else - prefix = ".text$"; -#endif else prefix = ".data$"; len = strlen (name) + strlen (prefix); @@ -494,7 +519,7 @@ i386_pe_section_type_flags (decl, name, reloc) if (decl && TREE_CODE (decl) == FUNCTION_DECL) flags = SECTION_CODE; - else if (decl && DECL_READONLY_SECTION (decl, reloc)) + else if (decl && decl_readonly_section (decl, reloc)) flags = 0; else { @@ -560,7 +585,7 @@ i386_pe_asm_named_section (name, flags) /* Mark a function appropriately. This should only be called for functions for which we are not emitting COFF debugging information. FILE is the assembler output file, NAME is the name of the - function, and PUBLIC is non-zero if the function is globally + function, and PUBLIC is nonzero if the function is globally visible. */ void @@ -598,7 +623,7 @@ i386_pe_record_external_function (name) { struct extern_list *p; - p = (struct extern_list *) permalloc (sizeof *p); + p = (struct extern_list *) xmalloc (sizeof *p); p->next = extern_head; p->name = name; extern_head = p; @@ -628,7 +653,7 @@ i386_pe_record_exported_symbol (name, is_data) { struct export_list *p; - p = (struct export_list *) permalloc (sizeof *p); + p = (struct export_list *) xmalloc (sizeof *p); p->next = export_head; p->name = name; p->is_data = is_data; @@ -668,7 +693,7 @@ i386_pe_asm_file_end (file) for (q = export_head; q != NULL; q = q->next) { fprintf (file, "\t.ascii \" -export:%s%s\"\n", - I386_PE_STRIP_ENCODING (q->name), + i386_pe_strip_name_encoding (q->name), (q->is_data) ? ",data" : ""); } } diff --git a/contrib/gcc/config/i386/x86-64.h b/contrib/gcc/config/i386/x86-64.h index 56e4684..37a2a30 100644 --- a/contrib/gcc/config/i386/x86-64.h +++ b/contrib/gcc/config/i386/x86-64.h @@ -29,16 +29,8 @@ Boston, MA 02111-1307, USA. */ /* Output assembler code to FILE to call the profiler. */ #define NO_PROFILE_COUNTERS -#undef FUNCTION_PROFILER -#define FUNCTION_PROFILER(FILE, LABELNO) \ -{ \ - if (TARGET_64BIT && flag_pic) \ - fprintf (FILE, "\tcall\t*mcount@PLT\n"); \ - else if (flag_pic) \ - fprintf (FILE, "\tcall\t*mcount@GOT(%%ebx)\n"); \ - else \ - fprintf (FILE, "\tcall\tmcount\n"); \ -} +#undef MCOUNT_NAME +#define MCOUNT_NAME "mcount" #undef SIZE_TYPE #define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int") @@ -85,9 +77,8 @@ Boston, MA 02111-1307, USA. */ /* i386 System V Release 4 uses DWARF debugging info. x86-64 ABI specifies DWARF2. */ -#undef DWARF2_DEBUGGING_INFO #undef DWARF_DEBUGGING_INFO -#define DWARF2_DEBUGGING_INFO +#define DWARF2_DEBUGGING_INFO 1 #define DWARF2_UNWIND_INFO 1 /* Incorrectly autodetected in cross compilation. */ #undef HAVE_AS_DWARF2_DEBUG_LINE diff --git a/contrib/gcc/config/i386/xm-vsta.h b/contrib/gcc/config/i386/xm-vsta.h index 735d1d5..53943ea 100644 --- a/contrib/gcc/config/i386/xm-vsta.h +++ b/contrib/gcc/config/i386/xm-vsta.h @@ -1,2 +1,11 @@ /* Use semicolons to separate elements of a path. */ #define PATH_SEPARATOR ';' + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("unix"); \ + } \ + while (0) + +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" diff --git a/contrib/gcc/config/i386/xmmintrin.h b/contrib/gcc/config/i386/xmmintrin.h index 409bf17..43a05c1 100644 --- a/contrib/gcc/config/i386/xmmintrin.h +++ b/contrib/gcc/config/i386/xmmintrin.h @@ -30,6 +30,10 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED +#ifndef __SSE__ +# error "SSE instruction set not enabled" +#else + /* We need type definitions from the MMX header file. */ #include <mmintrin.h> @@ -471,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A) return __builtin_ia32_cvtss2si ((__v4sf) __A); } +#ifdef __x86_64__ +/* Convert the lower SPFP value to a 32-bit integer according to the current + rounding mode. */ +static __inline long long +_mm_cvtss_si64x (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); +} +#endif + /* Convert the two lower SPFP values to 32-bit integers according to the current rounding mode. Return the integers in packed form. */ static __inline __m64 @@ -486,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A) return __builtin_ia32_cvttss2si ((__v4sf) __A); } +#ifdef __x86_64__ +/* Truncate the lower SPFP value to a 32-bit integer. */ +static __inline long long +_mm_cvttss_si64x (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); +} +#endif + /* Truncate the two lower SPFP values to 32-bit integers. Return the integers in packed form. */ static __inline __m64 @@ -501,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B) return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); } +#ifdef __x86_64__ +/* Convert B to a SPFP value and insert it as element zero in A. */ +static __inline __m128 +_mm_cvtsi64x_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); +} +#endif + /* Convert the two 32-bit values in B to SPFP form and insert them as the two lower elements in A. */ static __inline __m128 @@ -602,7 +634,7 @@ _mm_cvtps_pi16(__m128 __A) __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf); __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf); __v2si __losi = __builtin_ia32_cvtps2pi (__losf); - return (__m64) __builtin_ia32_packssdw (__losi, __hisi); + return (__m64) __builtin_ia32_packssdw (__hisi, __losi); } /* Convert the four SPFP values in A to four signed 8-bit integers. */ @@ -644,7 +676,7 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B) /* Sets the upper two SPFP values with 64-bits of data loaded from P; the lower two values are passed through from A. */ static __inline __m128 -_mm_loadh_pi (__m128 __A, __m64 *__P) +_mm_loadh_pi (__m128 __A, __m64 const *__P) { return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P); } @@ -673,7 +705,7 @@ _mm_movelh_ps (__m128 __A, __m128 __B) /* Sets the lower two SPFP values with 64-bits of data loaded from P; the upper two values are passed through from A. */ static __inline __m128 -_mm_loadl_pi (__m128 __A, __m64 *__P) +_mm_loadl_pi (__m128 __A, __m64 const *__P) { return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P); } @@ -758,42 +790,42 @@ _MM_SET_FLUSH_ZERO_MODE (unsigned int __mode) /* Create a vector with element 0 as *P and the rest zero. */ static __inline __m128 -_mm_load_ss (float *__P) +_mm_load_ss (float const *__P) { return (__m128) __builtin_ia32_loadss (__P); } /* Create a vector with all four elements equal to *P. */ static __inline __m128 -_mm_load1_ps (float *__P) +_mm_load1_ps (float const *__P) { __v4sf __tmp = __builtin_ia32_loadss (__P); return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,0,0,0)); } static __inline __m128 -_mm_load_ps1 (float *__P) +_mm_load_ps1 (float const *__P) { return _mm_load1_ps (__P); } /* Load four SPFP values from P. The address must be 16-byte aligned. */ static __inline __m128 -_mm_load_ps (float *__P) +_mm_load_ps (float const *__P) { return (__m128) __builtin_ia32_loadaps (__P); } /* Load four SPFP values from P. The address need not be 16-byte aligned. */ static __inline __m128 -_mm_loadu_ps (float *__P) +_mm_loadu_ps (float const *__P) { return (__m128) __builtin_ia32_loadups (__P); } /* Load four SPFP values in reverse order. The address must be aligned. */ static __inline __m128 -_mm_loadr_ps (float *__P) +_mm_loadr_ps (float const *__P) { __v4sf __tmp = __builtin_ia32_loadaps (__P); return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3)); @@ -887,7 +919,7 @@ _mm_storeu_ps (float *__P, __m128 __A) __builtin_ia32_storeups (__P, (__v4sf)__A); } -/* Store four SPFP values in reverse order. The addres must be aligned. */ +/* Store four SPFP values in reverse order. The address must be aligned. */ static __inline void _mm_storer_ps (float *__P, __m128 __A) { @@ -1033,7 +1065,7 @@ _mm_prefetch (void *__P, enum _mm_hint __I) static __inline void _mm_stream_pi (__m64 *__P, __m64 __A) { - __builtin_ia32_movntq (__P, (long long)__A); + __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A); } /* Likewise. The address must be 16-byte aligned. */ @@ -1074,4 +1106,1469 @@ do { \ (row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD); \ } while (0) +#ifdef __SSE2__ +/* SSE2 */ +typedef int __v2df __attribute__ ((mode (V2DF))); +typedef int __v2di __attribute__ ((mode (V2DI))); +typedef int __v4si __attribute__ ((mode (V4SI))); +typedef int __v8hi __attribute__ ((mode (V8HI))); +typedef int __v16qi __attribute__ ((mode (V16QI))); + +/* Create a selector for use with the SHUFPD instruction. */ +#define _MM_SHUFFLE2(fp1,fp0) \ + (((fp1) << 1) | (fp0)) + +#define __m128i __v2di +#define __m128d __v2df + +/* Create a vector with element 0 as *P and the rest zero. */ +static __inline __m128d +_mm_load_sd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadsd (__P); +} + +/* Create a vector with all two elements equal to *P. */ +static __inline __m128d +_mm_load1_pd (double const *__P) +{ + __v2df __tmp = __builtin_ia32_loadsd (__P); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); +} + +static __inline __m128d +_mm_load_pd1 (double const *__P) +{ + return _mm_load1_pd (__P); +} + +/* Load two DPFP values from P. The addresd must be 16-byte aligned. */ +static __inline __m128d +_mm_load_pd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadapd (__P); +} + +/* Load two DPFP values from P. The addresd need not be 16-byte aligned. */ +static __inline __m128d +_mm_loadu_pd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadupd (__P); +} + +/* Load two DPFP values in reverse order. The addresd must be aligned. */ +static __inline __m128d +_mm_loadr_pd (double const *__P) +{ + __v2df __tmp = __builtin_ia32_loadapd (__P); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); +} + +/* Create a vector with element 0 as F and the rest zero. */ +static __inline __m128d +_mm_set_sd (double __F) +{ + return (__m128d) __builtin_ia32_loadsd (&__F); +} + +/* Create a vector with all two elements equal to F. */ +static __inline __m128d +_mm_set1_pd (double __F) +{ + __v2df __tmp = __builtin_ia32_loadsd (&__F); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); +} + +static __inline __m128d +_mm_set_pd1 (double __F) +{ + return _mm_set1_pd (__F); +} + +/* Create the vector [Z Y]. */ +static __inline __m128d +_mm_set_pd (double __Z, double __Y) +{ + union { + double __a[2]; + __m128d __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} + +/* Create the vector [Y Z]. */ +static __inline __m128d +_mm_setr_pd (double __Z, double __Y) +{ + return _mm_set_pd (__Y, __Z); +} + +/* Create a vector of zeros. */ +static __inline __m128d +_mm_setzero_pd (void) +{ + return (__m128d) __builtin_ia32_setzeropd (); +} + +/* Stores the lower DPFP value. */ +static __inline void +_mm_store_sd (double *__P, __m128d __A) +{ + __builtin_ia32_storesd (__P, (__v2df)__A); +} + +/* Store the lower DPFP value acrosd two words. */ +static __inline void +_mm_store1_pd (double *__P, __m128d __A) +{ + __v2df __va = (__v2df)__A; + __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0)); + __builtin_ia32_storeapd (__P, __tmp); +} + +static __inline void +_mm_store_pd1 (double *__P, __m128d __A) +{ + _mm_store1_pd (__P, __A); +} + +/* Store two DPFP values. The addresd must be 16-byte aligned. */ +static __inline void +_mm_store_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeapd (__P, (__v2df)__A); +} + +/* Store two DPFP values. The addresd need not be 16-byte aligned. */ +static __inline void +_mm_storeu_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeupd (__P, (__v2df)__A); +} + +/* Store two DPFP values in reverse order. The addresd must be aligned. */ +static __inline void +_mm_storer_pd (double *__P, __m128d __A) +{ + __v2df __va = (__v2df)__A; + __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1)); + __builtin_ia32_storeapd (__P, __tmp); +} + +/* Sets the low DPFP value of A from the low value of B. */ +static __inline __m128d +_mm_move_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); +} + + +static __inline __m128d +_mm_add_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_add_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_sub_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_sub_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_mul_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_mul_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_div_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_div_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_sqrt_pd (__m128d __A) +{ + return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); +} + +/* Return pair {sqrt (A[0), B[1]}. */ +static __inline __m128d +_mm_sqrt_sd (__m128d __A, __m128d __B) +{ + __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); + return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); +} + +static __inline __m128d +_mm_min_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_min_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_max_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_max_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_and_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_andnot_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_or_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_xor_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpeq_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmplt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmple_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpgt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpge_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpneq_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnlt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnle_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpngt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnge_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpord_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpunord_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpeq_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmplt_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmple_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpgt_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpltsd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpge_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmplesd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpneq_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnlt_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpnle_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpngt_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpnltsd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpnge_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpnlesd ((__v2df) __B, + (__v2df) + __A)); +} + +static __inline __m128d +_mm_cmpord_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_cmpunord_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comieq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comilt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comile_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comigt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comige_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_comineq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomieq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomilt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomile_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomigt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomige_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); +} + +static __inline int +_mm_ucomineq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); +} + +/* Create a vector with element 0 as *P and the rest zero. */ + +static __inline __m128i +_mm_load_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqa ((char const *)__P); +} + +static __inline __m128i +_mm_loadu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); +} + +static __inline __m128i +_mm_loadl_epi64 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P); +} + +static __inline void +_mm_store_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B); +} + +static __inline void +_mm_storeu_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); +} + +static __inline void +_mm_storel_epi64 (__m128i *__P, __m128i __B) +{ + *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B); +} + +static __inline __m64 +_mm_movepi64_pi64 (__m128i __B) +{ + return (__m64) __builtin_ia32_movdq2q ((__v2di)__B); +} + +static __inline __m128i +_mm_move_epi64 (__m128i __A) +{ + return (__m128i) __builtin_ia32_movq ((__v2di)__A); +} + +/* Create a vector of zeros. */ +static __inline __m128i +_mm_setzero_si128 (void) +{ + return (__m128i) __builtin_ia32_setzero128 (); +} + +static __inline __m128i +_mm_set_epi64 (__m64 __A, __m64 __B) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp); +} + +/* Create the vector [Z Y X W]. */ +static __inline __m128i +_mm_set_epi32 (int __Z, int __Y, int __X, int __W) +{ + union { + int __a[4]; + __m128i __v; + } __u; + + __u.__a[0] = __W; + __u.__a[1] = __X; + __u.__a[2] = __Y; + __u.__a[3] = __Z; + + return __u.__v; +} + +#ifdef __x86_64__ +/* Create the vector [Z Y]. */ +static __inline __m128i +_mm_set_epi64x (long long __Z, long long __Y) +{ + union { + long __a[2]; + __m128i __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} +#endif + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_set_epi16 (short __Z, short __Y, short __X, short __W, + short __V, short __U, short __T, short __S) +{ + union { + short __a[8]; + __m128i __v; + } __u; + + __u.__a[0] = __S; + __u.__a[1] = __T; + __u.__a[2] = __U; + __u.__a[3] = __V; + __u.__a[4] = __W; + __u.__a[5] = __X; + __u.__a[6] = __Y; + __u.__a[7] = __Z; + + return __u.__v; +} + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_set_epi8 (char __Z, char __Y, char __X, char __W, + char __V, char __U, char __T, char __S, + char __Z1, char __Y1, char __X1, char __W1, + char __V1, char __U1, char __T1, char __S1) +{ + union { + char __a[16]; + __m128i __v; + } __u; + + __u.__a[0] = __S1; + __u.__a[1] = __T1; + __u.__a[2] = __U1; + __u.__a[3] = __V1; + __u.__a[4] = __W1; + __u.__a[5] = __X1; + __u.__a[6] = __Y1; + __u.__a[7] = __Z1; + __u.__a[8] = __S; + __u.__a[9] = __T; + __u.__a[10] = __U; + __u.__a[11] = __V; + __u.__a[12] = __W; + __u.__a[13] = __X; + __u.__a[14] = __Y; + __u.__a[15] = __Z; + + return __u.__v; +} + +static __inline __m128i +_mm_set1_epi64 (__m64 __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp); +} + +static __inline __m128i +_mm_set1_epi32 (int __A) +{ + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +#ifdef __x86_64__ +static __inline __m128i +_mm_set1_epi64x (long long __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0)); +} +#endif + +static __inline __m128i +_mm_set1_epi16 (short __A) +{ + int __Acopy = (unsigned short)__A; + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); + __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_set1_epi8 (char __A) +{ + int __Acopy = (unsigned char)__A; + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); + __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); + __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_setr_epi64 (__m64 __A, __m64 __B) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2); +} + +/* Create the vector [Z Y X W]. */ +static __inline __m128i +_mm_setr_epi32 (int __W, int __X, int __Y, int __Z) +{ + union { + int __a[4]; + __m128i __v; + } __u; + + __u.__a[0] = __W; + __u.__a[1] = __X; + __u.__a[2] = __Y; + __u.__a[3] = __Z; + + return __u.__v; +} +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_setr_epi16 (short __S, short __T, short __U, short __V, + short __W, short __X, short __Y, short __Z) +{ + union { + short __a[8]; + __m128i __v; + } __u; + + __u.__a[0] = __S; + __u.__a[1] = __T; + __u.__a[2] = __U; + __u.__a[3] = __V; + __u.__a[4] = __W; + __u.__a[5] = __X; + __u.__a[6] = __Y; + __u.__a[7] = __Z; + + return __u.__v; +} + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1, + char __W1, char __X1, char __Y1, char __Z1, + char __S, char __T, char __U, char __V, + char __W, char __X, char __Y, char __Z) +{ + union { + char __a[16]; + __m128i __v; + } __u; + + __u.__a[0] = __S1; + __u.__a[1] = __T1; + __u.__a[2] = __U1; + __u.__a[3] = __V1; + __u.__a[4] = __W1; + __u.__a[5] = __X1; + __u.__a[6] = __Y1; + __u.__a[7] = __Z1; + __u.__a[8] = __S; + __u.__a[9] = __T; + __u.__a[10] = __U; + __u.__a[11] = __V; + __u.__a[12] = __W; + __u.__a[13] = __X; + __u.__a[14] = __Y; + __u.__a[15] = __Z; + + return __u.__v; +} + +static __inline __m128d +_mm_cvtepi32_pd (__m128i __A) +{ + return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); +} + +static __inline __m128 +_mm_cvtepi32_ps (__m128i __A) +{ + return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); +} + +static __inline __m128i +_mm_cvtpd_epi32 (__m128d __A) +{ + return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); +} + +static __inline __m64 +_mm_cvtpd_pi32 (__m128d __A) +{ + return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); +} + +static __inline __m128 +_mm_cvtpd_ps (__m128d __A) +{ + return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); +} + +static __inline __m128i +_mm_cvttpd_epi32 (__m128d __A) +{ + return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); +} + +static __inline __m64 +_mm_cvttpd_pi32 (__m128d __A) +{ + return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); +} + +static __inline __m128d +_mm_cvtpi32_pd (__m64 __A) +{ + return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); +} + +static __inline __m128i +_mm_cvtps_epi32 (__m128 __A) +{ + return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); +} + +static __inline __m128i +_mm_cvttps_epi32 (__m128 __A) +{ + return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); +} + +static __inline __m128d +_mm_cvtps_pd (__m128 __A) +{ + return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); +} + +static __inline int +_mm_cvtsd_si32 (__m128d __A) +{ + return __builtin_ia32_cvtsd2si ((__v2df) __A); +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvtsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvtsd2si64 ((__v2df) __A); +} +#endif + +static __inline int +_mm_cvttsd_si32 (__m128d __A) +{ + return __builtin_ia32_cvttsd2si ((__v2df) __A); +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvttsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvttsd2si64 ((__v2df) __A); +} +#endif + +static __inline __m128 +_mm_cvtsd_ss (__m128 __A, __m128d __B) +{ + return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); +} + +static __inline __m128d +_mm_cvtsi32_sd (__m128d __A, int __B) +{ + return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); +} + +#ifdef __x86_64__ +static __inline __m128d +_mm_cvtsi64x_sd (__m128d __A, long long __B) +{ + return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); +} +#endif + +static __inline __m128d +_mm_cvtss_sd (__m128d __A, __m128 __B) +{ + return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); +} + +#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C))) + +static __inline __m128d +_mm_unpackhi_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_unpacklo_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_loadh_pd (__m128d __A, double const *__B) +{ + return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B); +} + +static __inline void +_mm_storeh_pd (double *__A, __m128d __B) +{ + __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B); +} + +static __inline __m128d +_mm_loadl_pd (__m128d __A, double const *__B) +{ + return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B); +} + +static __inline void +_mm_storel_pd (double *__A, __m128d __B) +{ + __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B); +} + +static __inline int +_mm_movemask_pd (__m128d __A) +{ + return __builtin_ia32_movmskpd ((__v2df)__A); +} + +static __inline __m128i +_mm_packs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_packs_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_packus_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_unpackhi_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_unpackhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_unpackhi_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_unpackhi_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_unpacklo_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_unpacklo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_unpacklo_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_unpacklo_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_add_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_add_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_add_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_add_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_adds_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_adds_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_adds_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_adds_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_sub_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_sub_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_sub_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_sub_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_subs_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_subs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_subs_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_subs_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_madd_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_mulhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_mullo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m64 +_mm_mul_su32 (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); +} + +static __inline __m128i +_mm_mul_epu32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_sll_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sll_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sll_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sra_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_sra_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_srl_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_srl_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_srl_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_slli_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); +} + +static __inline __m128i +_mm_slli_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); +} + +static __inline __m128i +_mm_slli_epi64 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); +} + +static __inline __m128i +_mm_srai_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); +} + +static __inline __m128i +_mm_srai_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); +} + +#if 0 +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, const int __B) +{ + return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) +} + +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, const int __B) +{ + return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) +} +#endif +#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) +#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) + +static __inline __m128i +_mm_srli_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); +} + +static __inline __m128i +_mm_srli_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); +} + +static __inline __m128i +_mm_srli_epi64 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); +} + +static __inline __m128i +_mm_and_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_andnot_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_or_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_xor_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i +_mm_cmpeq_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_cmpeq_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_cmpeq_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); +} + +static __inline __m128i +_mm_cmplt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); +} + +static __inline __m128i +_mm_cmplt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); +} + +static __inline __m128i +_mm_cmplt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); +} + +static __inline __m128i +_mm_cmpgt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_cmpgt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_cmpgt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); +} + +#define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B) + +#define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C)) + +static __inline __m128i +_mm_max_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_max_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_min_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_min_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline int +_mm_movemask_epi8 (__m128i __A) +{ + return __builtin_ia32_pmovmskb128 ((__v16qi)__A); +} + +static __inline __m128i +_mm_mulhi_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); +} + +#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) +#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) +#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) + +static __inline void +_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) +{ + __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); +} + +static __inline __m128i +_mm_avg_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline __m128i +_mm_avg_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); +} + +static __inline __m128i +_mm_sad_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); +} + +static __inline void +_mm_stream_si32 (int *__A, int __B) +{ + __builtin_ia32_movnti (__A, __B); +} + +static __inline void +_mm_stream_si128 (__m128i *__A, __m128i __B) +{ + __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); +} + +static __inline void +_mm_stream_pd (double *__A, __m128d __B) +{ + __builtin_ia32_movntpd (__A, (__v2df)__B); +} + +static __inline __m128i +_mm_movpi64_epi64 (__m64 __A) +{ + return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A); +} + +static __inline void +_mm_clflush (void const *__A) +{ + return __builtin_ia32_clflush (__A); +} + +static __inline void +_mm_lfence (void) +{ + __builtin_ia32_lfence (); +} + +static __inline void +_mm_mfence (void) +{ + __builtin_ia32_mfence (); +} + +static __inline __m128i +_mm_cvtsi32_si128 (int __A) +{ + return (__m128i) __builtin_ia32_loadd (&__A); +} + +#ifdef __x86_64__ +static __inline __m128i +_mm_cvtsi64x_si128 (long long __A) +{ + return (__m128i) __builtin_ia32_movq2dq (__A); +} +#endif + +static __inline int +_mm_cvtsi128_si32 (__m128i __A) +{ + int __tmp; + __builtin_ia32_stored (&__tmp, (__v4si)__A); + return __tmp; +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvtsi128_si64x (__m128i __A) +{ + return __builtin_ia32_movdq2q ((__v2di)__A); +} +#endif + +#endif /* __SSE2__ */ + +#endif /* __SSE__ */ #endif /* _XMMINTRIN_H_INCLUDED */ |