diff options
author | obrien <obrien@FreeBSD.org> | 2002-02-01 18:16:02 +0000 |
---|---|---|
committer | obrien <obrien@FreeBSD.org> | 2002-02-01 18:16:02 +0000 |
commit | c9ab9ae440a8066b2c2b85b157b1fdadcf09916a (patch) | |
tree | 086d9d6c8fbd4fc8fe4495059332f66bc0f8d12b /contrib/gcc/config/ia64 | |
parent | 2ecfd8bd04b63f335c1ec6295740a4bfd97a4fa6 (diff) | |
download | FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.zip FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.tar.gz |
Enlist the FreeBSD-CURRENT users as testers of what is to become Gcc 3.1.0.
These bits are taken from the FSF anoncvs repo on 1-Feb-2002 08:20 PST.
Diffstat (limited to 'contrib/gcc/config/ia64')
28 files changed, 20635 insertions, 0 deletions
diff --git a/contrib/gcc/config/ia64/aix.h b/contrib/gcc/config/ia64/aix.h new file mode 100644 index 0000000..1e57c2b --- /dev/null +++ b/contrib/gcc/config/ia64/aix.h @@ -0,0 +1,256 @@ +/* Definitions of target machine GNU compiler. IA-64/AIX version. + Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + Contributed by Timothy Wall (twall@cygnus.com) + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* AIX5 (aka Monterey): a mix of AIX and UnixWare. + This file is loosely based on ia64/linux.h. */ + +/* This macro is a C statement to print on `stderr' a string describing the + particular machine description choice. */ + +#define TARGET_VERSION fprintf (stderr, " (IA-64) AIX"); + +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +#define SET_ASM_OP "\t.set\t" + +#undef MD_EXEC_PREFIX +#undef MD_STARTFILE_PREFIX +#define MD_STARTFILE_PREFIX "/usr/lib/ia64l64/" + +/* Output at beginning of assembler file. */ +/* The .file command should always begin the output. */ +#undef ASM_FILE_START +#define ASM_FILE_START(FILE) \ + do { \ + output_file_directive (FILE, main_input_filename); \ + fprintf (FILE, "\t.version\t\"01.01\"\n"); \ + } while (0) + +/* Provide a STARTFILE_SPEC appropriate for AIX. Here we add + the crti C++ startup files file which provide part of the support + for getting C++ file-scope static object constructed before entering + `main'. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ +"%{!shared: \ + %{pg:gcrt1_64.o%s} %{!pg:%{p:mcrt1_64.o%s} \ + %{!p:%{profile:gcrt1_64.o%s} \ + %{!profile:crt1_64.o%s}}}} \ + crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" + +/* Provide a ENDFILE_SPEC appropriate for AIX. Here we tack on + the crtn file which provides termination of the support for getting C++ + file-scope static object constructed before entering `main'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s" + +/* Define this so we can compile MS code for use with WINE. */ +#define HANDLE_PRAGMA_PACK_PUSH_POP + +/* A C string constant that tells the GNU CC driver program options to pass to + CPP. It can also specify how to translate options you give to GNU CC into + options for GNU CC to pass to the CPP. */ + +/* If -ansi, we need to define _ANSI_C_SOURCE to get the right headers. */ +#undef CPP_SPEC +#define CPP_SPEC "\ +%{mcpu=itanium:-D__itanium__} %{mbig-endian:-D__BIG_ENDIAN__} \ +%{ansi:-D_ANSI_C_SOURCE} \ +%{posix:-D_POSIX_SOURCE} \ +%{cpp_cpu} \ +-D__LONG_MAX__=9223372036854775807L" + +#undef CPP_PREDEFINES +#define CPP_PREDEFINES "\ +-D__ia64 -D__ia64__ -D_AIX -D_AIX64 -D_LONGLONG -Dunix \ +-D__LP64__ -D__ELF__ -Asystem=unix -Asystem=aix -Acpu=ia64 -Amachine=ia64 \ +-D__64BIT__ -D_LONG_LONG -D_IA64 -D__int128=__size128_t" + +/* The GNU C++ standard library requires that these macros be defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_XOPEN_SOURCE=500 \ + -D_XOPEN_SOURCE_EXTENDED=1 \ + -D_LARGE_FILE_API \ + -D_ALL_SOURCE \ + -D__LONG_MAX__=9223372036854775807L \ + %{cpp_cpu}" + +/* ia64-specific options for gas */ +#undef ASM_SPEC +#define ASM_SPEC "-x %{mconstant-gp} %{mauto-pic}" + +/* Define this for shared library support. */ + +#undef LINK_SPEC +#define LINK_SPEC "\ +%{shared:-shared} \ +%{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!dynamic-linker:-dynamic-linker /usr/lib/ia64l64/libc.so.1}} \ + %{static:-static}}" + +#define DONT_USE_BUILTIN_SETJMP +#define JMP_BUF_SIZE 85 + +/* Output any profiling code before the prologue. */ + +#undef PROFILE_BEFORE_PROLOGUE +#define PROFILE_BEFORE_PROLOGUE 1 + +/* A C statement or compound statement to output to FILE some assembler code to + call the profiling subroutine `mcount'. + + FIXME this is not supported until xlC supports it and can thus tell us + how to do it. +*/ + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(FILE, LABELNO) \ +do { \ +} while (0) + +/* Tell the linker where to find the crt*.o files. */ + +#ifndef CROSS_COMPILE +#undef STANDARD_STARTFILE_PREFIX +#define STANDARD_STARTFILE_PREFIX "/usr/lib/ia64l64/" +#endif + +/* Override SELECT_SECTION and SELECT_RTX_SECTION from config/ia64/sysv4.h; + these definitions ignore flag_pic as if it were always set; + it is illegal to have relocations in shared segments on AIX. */ + +/* A C statement or statements to switch to the appropriate + section for output of DECL. DECL is either a `VAR_DECL' node + or a constant of some sort. RELOC indicates whether forming + the initial value of DECL requires link-time relocations. */ + +#undef SELECT_SECTION +#define SELECT_SECTION(DECL,RELOC,ALIGN) \ +{ \ + if (TREE_CODE (DECL) == STRING_CST) \ + { \ + if (! flag_writable_strings) \ + const_section (); \ + else \ + data_section (); \ + } \ + else if (TREE_CODE (DECL) == VAR_DECL) \ + { \ + if (XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0] \ + == SDATA_NAME_FLAG_CHAR) \ + sdata_section (); \ + /* ??? We need the extra ! RELOC check, because the default is to \ + only check RELOC if flag_pic is set, and we don't set flag_pic \ + (yet?). */ \ + else if (DECL_READONLY_SECTION (DECL, RELOC) && ! (RELOC)) \ + const_section (); \ + else \ + data_section (); \ + } \ + /* This could be a CONSTRUCTOR containing ADDR_EXPR of a VAR_DECL, \ + in which case we can't put it in a shared library rodata. */ \ + else if (RELOC) \ + data_section (); \ + else \ + const_section (); \ +} + +/* Similarly for constant pool data. */ + +extern unsigned int ia64_section_threshold; +#undef SELECT_RTX_SECTION +#define SELECT_RTX_SECTION(MODE, RTX, ALIGN) \ +{ \ + if (GET_MODE_SIZE (MODE) > 0 \ + && GET_MODE_SIZE (MODE) <= ia64_section_threshold) \ + sdata_section (); \ + else if (symbolic_operand ((RTX), (MODE))) \ + data_section (); \ + else \ + const_section (); \ +} + +#undef UNIQUE_SECTION +#define UNIQUE_SECTION(DECL, RELOC) \ + do \ + { \ + int len; \ + int sec; \ + const char *name; \ + char *string; \ + const char *prefix; \ + static const char *const prefixes[/*4*/3][2] = \ + { \ + { ".text.", ".gnu.linkonce.t." }, \ + { ".rodata.", ".gnu.linkonce.r." }, \ + { ".data.", ".gnu.linkonce.d." } \ + /* Do not generate unique sections for uninitialised \ + data since we do not have support for this in the \ + linker scripts yet... \ + ,{ ".bss.", ".gnu.linkonce.b." } */ \ + }; \ + \ + if (TREE_CODE (DECL) == FUNCTION_DECL) \ + sec = 0; \ + /* else if (DECL_INITIAL (DECL) == 0 \ + || DECL_INITIAL (DECL) == error_mark_node) \ + sec = 3; */ \ + else if (DECL_READONLY_SECTION (DECL, RELOC) && ! (RELOC))\ + sec = 1; \ + else \ + sec = 2; \ + \ + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL)); \ + /* Strip off any encoding in name. */ \ + STRIP_NAME_ENCODING (name, name); \ + prefix = prefixes[sec][DECL_ONE_ONLY(DECL)]; \ + len = strlen (name) + strlen (prefix); \ + string = alloca (len + 1); \ + \ + sprintf (string, "%s%s", prefix, name); \ + \ + DECL_SECTION_NAME (DECL) = build_string (len, string); \ + } \ + while (0) + +/* Override ia64/sysv4.h setting with that used by AIX5. */ +#undef WCHAR_TYPE +#ifdef __64BIT__ +#define WCHAR_TYPE "unsigned int" +#else +#define WCHAR_TYPE "unsigned short" +#endif + +/* Define the `__builtin_va_list' type for AIX. Use char* b/c that's what the + system headers expect. */ +#define BUILD_VA_LIST_TYPE(VALIST) \ + (VALIST) = build_pointer_type(char_type_node) + +/* End of aix.h */ diff --git a/contrib/gcc/config/ia64/crtbegin.asm b/contrib/gcc/config/ia64/crtbegin.asm new file mode 100644 index 0000000..388b24e --- /dev/null +++ b/contrib/gcc/config/ia64/crtbegin.asm @@ -0,0 +1,246 @@ +/* Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +.section .ctors,"aw","progbits" + .align 8 +__CTOR_LIST__: + data8 -1 + +.section .dtors,"aw","progbits" + .align 8 +__DTOR_LIST__: + data8 -1 + +.section .jcr,"aw","progbits" + .align 8 +__JCR_LIST__: + +.section .sdata + .type dtor_ptr#,@object + .size dtor_ptr#,8 +dtor_ptr: + data8 @gprel(__DTOR_LIST__# + 8) + + /* A handle for __cxa_finalize to manage c++ local destructors. */ + .global __dso_handle# + .type __dso_handle#,@object + .size __dso_handle#,8 +#ifdef SHARED + .section .data +__dso_handle: + data8 __dso_handle# +#else + .section .bss +__dso_handle: + data8 0 +#endif + .hidden __dso_handle# + + +/* + * Fragment of the ELF _fini routine that invokes our dtor cleanup. + * + * We make the call by indirection, because in large programs the + * .fini and .init sections are not in range of the destination, and + * we cannot allow the linker to insert a stub at the end of this + * fragment of the _fini function. Further, Itanium does not implement + * the long branch instructions, and we do not wish every program to + * trap to the kernel for emulation. + * + * Note that we require __do_global_dtors_aux to preserve the GP, + * so that the next fragment in .fini gets the right value. + */ +.section .fini,"ax","progbits" + { .mlx + movl r2 = @pcrel(__do_global_dtors_aux# - 16) + } + { .mii + mov r3 = ip + ;; + add r2 = r2, r3 + ;; + } + { .mib + mov b6 = r2 + br.call.sptk.many b0 = b6 + ;; + } + +/* Likewise for _init. */ + +.section .init,"ax","progbits" + { .mlx + movl r2 = @pcrel(__do_jv_register_classes# - 16) + } + { .mii + mov r3 = ip + ;; + add r2 = r2, r3 + ;; + } + { .mib + mov b6 = r2 + br.call.sptk.many b0 = b6 + ;; + } + +.section .text + .align 16 + .proc __do_global_dtors_aux# +__do_global_dtors_aux: +#ifndef SHARED + { .mii + alloc loc3 = ar.pfs, 0, 4, 1, 0 + addl loc0 = @gprel(dtor_ptr#), gp + mov loc1 = b0 + } + { .mib + mov loc2 = gp + br.sptk.few 1f + ;; + } +#else + /* + if (__cxa_finalize) + __cxa_finalize(__dso_handle) + */ + { .mii + alloc loc3 = ar.pfs, 0, 4, 1, 0 + addl loc0 = @gprel(dtor_ptr#), gp + addl r16 = @ltoff(@fptr(__cxa_finalize#)), gp + ;; + } + { .mmi + ld8 r16 = [r16] + ;; + addl out0 = @ltoff(__dso_handle#), gp + cmp.ne p7, p0 = r0, r16 + ;; + } + { .mmi + ld8 out0 = [out0] +(p7) ld8 r18 = [r16], 8 + mov loc1 = b0 + ;; + } + { .mfi + mov loc2 = gp +(p7) mov b6 = r18 + } + { + .mfb +(p7) ld8 gp = [r16] +(p7) br.call.sptk.many b0 = b6 + } + { .mfb + br.sptk.few 1f + } +#endif + /* + do { + dtor_ptr++; + (*(dtor_ptr-1)) (); + } while (dtor_ptr); + */ +0: + { .mmi + st8 [loc0] = r15 + ld8 r17 = [r16], 8 + ;; + } + { .mib + ld8 gp = [r16] + mov b6 = r17 + br.call.sptk.many b0 = b6 + } +1: + { .mmi + ld8 r15 = [loc0] + ;; + add r16 = r15, loc2 + adds r15 = 8, r15 + ;; + } + { .mmi + ld8 r16 = [r16] + mov gp = loc2 + mov b0 = loc1 + ;; + } + { .mib + cmp.ne p6, p0 = r0, r16 + mov ar.pfs = loc3 +(p6) br.cond.sptk.few 0b + } + { .bbb + br.ret.sptk.many b0 + ;; + } + .endp __do_global_dtors_aux# + + .align 16 + .proc __do_jv_register_classes# +__do_jv_register_classes: + { .mlx + alloc loc2 = ar.pfs, 0, 3, 1, 0 + movl out0 = @gprel(__JCR_LIST__) + ;; + } + { .mmi + addl r14 = @ltoff(@fptr(_Jv_RegisterClasses)), gp + add out0 = out0, gp + ;; + } + { .mmi + ld8 r14 = [r14] + ld8 r15 = [out0] + cmp.ne p6, p0 = r0, r0 + ;; + } + { .mib + cmp.eq.or p6, p0 = r0, r14 + cmp.eq.or p6, p0 = r0, r15 +(p6) br.ret.sptk.many b0 + } + { .mii + ld8 r15 = [r14], 8 + mov loc0 = b0 + mov loc1 = gp + ;; + } + { .mib + ld8 gp = [r14] + mov b6 = r15 + br.call.sptk.many b0 = b6 + ;; + } + { .mii + mov gp = loc1 + mov b0 = loc0 + mov ar.pfs = loc2 + } + { .bbb + br.ret.sptk.many b0 + ;; + } + .endp __do_jv_register_classes# + +#ifdef SHARED +.weak __cxa_finalize# +#endif +.weak _Jv_RegisterClasses diff --git a/contrib/gcc/config/ia64/crtend.asm b/contrib/gcc/config/ia64/crtend.asm new file mode 100644 index 0000000..e5d109a --- /dev/null +++ b/contrib/gcc/config/ia64/crtend.asm @@ -0,0 +1,117 @@ +/* Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +.section .ctors,"aw","progbits" + .align 8 +__CTOR_END__: + data8 0 + +.section .dtors,"aw","progbits" + .align 8 +__DTOR_END__: + data8 0 + +.section .jcr,"aw","progbits" + .align 8 +__JCR_END__: + data8 0 + +/* + * Fragment of the ELF _init routine that invokes our dtor cleanup. + * + * We make the call by indirection, because in large programs the + * .fini and .init sections are not in range of the destination, and + * we cannot allow the linker to insert a stub at the end of this + * fragment of the _fini function. Further, Itanium does not implement + * the long branch instructions, and we do not wish every program to + * trap to the kernel for emulation. + * + * Note that we require __do_global_ctors_aux to preserve the GP, + * so that the next fragment in .fini gets the right value. + */ +.section .init,"ax","progbits" + { .mlx + movl r2 = @pcrel(__do_global_ctors_aux# - 16) + } + { .mii + mov r3 = ip + ;; + add r2 = r2, r3 + ;; + } + { .mib + mov b6 = r2 + br.call.sptk.many b0 = b6 + ;; + } + +.text + .align 16 + .proc __do_global_ctors_aux# +__do_global_ctors_aux: + /* + for (loc0 = __CTOR_END__-1; *p != -1; --p) + (*p) (); + */ + { .mlx + alloc loc4 = ar.pfs, 0, 5, 0, 0 + movl loc0 = @gprel(__CTOR_END__# - 8) + ;; + } + { .mmi + add loc0 = loc0, gp + mov loc1 = b0 + ;; + } + { + .mmi + ld8 loc3 = [loc0], -8 + mov loc2 = gp + ;; + } + { .mfb + cmp.eq p6, p0 = -1, loc3 +(p6) br.cond.spnt.few 2f + } +0: + { .mmi + ld8 r15 = [loc3], 8 + ;; + ld8 gp = [loc3] + mov b6 = r15 + } + { .mfb + ld8 loc3 = [loc0], -8 + br.call.sptk.many b0 = b6 + ;; + } + { .mfb + cmp.ne p6, p0 = -1, loc3 +(p6) br.cond.sptk.few 0b + } +2: + { .mii + mov gp = loc2 + mov b0 = loc1 + mov ar.pfs = loc4 + } + { .bbb + br.ret.sptk.many b0 + ;; + } + .endp __do_global_ctors_aux# diff --git a/contrib/gcc/config/ia64/crtfastmath.c b/contrib/gcc/config/ia64/crtfastmath.c new file mode 100644 index 0000000..aa0d120 --- /dev/null +++ b/contrib/gcc/config/ia64/crtfastmath.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. + Contributed by David Mosberger <davidm@hpl.hp.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) */ + +/* We could call fesetenv() here but that would create a confusing + dependency on libm (since that is where fesetenv() gets defined. + To avoid this, just do everything locally. */ +#define FE_NONIEEE_ENV 0x0009a04d0270037f + +static void __attribute__((constructor)) +__ia64_set_fast_math (void) +{ + __asm__ __volatile__ ("mov.m ar.fpsr=%0" : : "r"(FE_NONIEEE_ENV)); +} diff --git a/contrib/gcc/config/ia64/crti.asm b/contrib/gcc/config/ia64/crti.asm new file mode 100644 index 0000000..4b94b7f --- /dev/null +++ b/contrib/gcc/config/ia64/crti.asm @@ -0,0 +1,66 @@ +# Copyright (C) 2000, 2001 Free Software Foundation, Inc. +# Written By Timothy Wall +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# In addition to the permissions in the GNU General Public License, the +# Free Software Foundation gives you unlimited permission to link the +# compiled version of this file with other programs, and to distribute +# those programs without any restriction coming from the use of this +# file. (The General Public License restrictions do apply in other +# respects; for example, they cover modification of the file, and +# distribution when not linked into another program.) +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to +# the Free Software Foundation, 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# As a special exception, if you link this library with files +# compiled with GCC to produce an executable, this does not cause +# the resulting executable to be covered by the GNU General Public License. +# This exception does not however invalidate any other reasons why +# the executable file might be covered by the GNU General Public License. +# + +# This file just make a stack frame for the contents of the .fini and +# .init sections. Users may put any desired instructions in those +# sections. + + .file "crti.asm" + + .section ".init" + .align 16 + .global _init# +_init: + .prologue 14, 33 + .save ar.pfs, r34 + alloc r34 = ar.pfs, 0, 4, 0, 0 + .vframe r35 + mov r35 = r12 + .save rp, r33 + mov r33 = b0 + .body + + .section ".fini" + .align 16 + .global _fini# +_fini: + .prologue 14, 33 + .save ar.pfs, r34 + alloc r34 = ar.pfs, 0, 4, 0, 0 + .vframe r35 + mov r35 = r12 + .save rp, r33 + mov r33 = b0 + .body + +# end of crti.asm diff --git a/contrib/gcc/config/ia64/crtn.asm b/contrib/gcc/config/ia64/crtn.asm new file mode 100644 index 0000000..0b45d38 --- /dev/null +++ b/contrib/gcc/config/ia64/crtn.asm @@ -0,0 +1,56 @@ +# Copyright (C) 2000, 2001 Free Software Foundation, Inc. +# Written By Timothy Wall +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# In addition to the permissions in the GNU General Public License, the +# Free Software Foundation gives you unlimited permission to link the +# compiled version of this file with other programs, and to distribute +# those programs without any restriction coming from the use of this +# file. (The General Public License restrictions do apply in other +# respects; for example, they cover modification of the file, and +# distribution when not linked into another program.) +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to +# the Free Software Foundation, 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# As a special exception, if you link this library with files +# compiled with GCC to produce an executable, this does not cause +# the resulting executable to be covered by the GNU General Public License. +# This exception does not however invalidate any other reasons why +# the executable file might be covered by the GNU General Public License. +# + +# This file just makes sure that the .fini and .init sections do in +# fact return. Users may put any desired instructions in those sections. +# This file is the last thing linked into any executable. + + .file "crtn.asm" + + .section ".init" + ;; + mov ar.pfs = r34 + mov b0 = r33 + .restore sp + mov r12 = r35 + br.ret.sptk.many b0 + + .section ".fini" + ;; + mov ar.pfs = r34 + mov b0 = r33 + .restore sp + mov r12 = r35 + br.ret.sptk.many b0 + +# end of crtn.asm diff --git a/contrib/gcc/config/ia64/elf.h b/contrib/gcc/config/ia64/elf.h new file mode 100644 index 0000000..af8c7a6 --- /dev/null +++ b/contrib/gcc/config/ia64/elf.h @@ -0,0 +1,54 @@ +/* Definitions for embedded ia64-elf target. */ + +/* This macro is a C statement to print on `stderr' a string describing the + particular machine description choice. */ + +#define TARGET_VERSION fprintf (stderr, " (IA-64) ELF"); + +/* Define this to be a string constant containing `-D' options to define the + predefined macros that identify this machine and system. These macros will + be predefined unless the `-ansi' option is specified. */ +/* ??? This is undefed in svr4.h. */ +#define CPP_PREDEFINES "-Dia64 -Amachine=ia64" + +/* A C string constant that tells the GNU CC driver program options to pass to + the assembler. It can also specify how to translate options you give to GNU + CC into options for GNU CC to pass to the assembler. */ + +#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_AS) != 0 +/* GNU AS. */ +#define ASM_SPEC \ + "%{mno-gnu-as:-N so} %{!mno-gnu-as:-x} %{mconstant-gp} %{mauto-pic}" +#else +/* Intel ias. */ +#define ASM_SPEC \ + "%{!mgnu-as:-N so} %{mgnu-as:-x} %{mconstant-gp:-M const_gp}\ + %{mauto-pic:-M no_plabel}" +#endif + +/* A C string constant that tells the GNU CC driver program options to pass to + the linker. It can also specify how to translate options you give to GNU CC + into options for GNU CC to pass to the linker. */ + +/* The Intel linker does not support dynamic linking, so we need -dn. + The Intel linker gives annoying messages unless -N so is used. */ +#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_LD) != 0 +/* GNU LD. */ +#define LINK_SPEC "%{mno-gnu-ld:-dn -N so}" +#else +/* Intel ild. */ +#define LINK_SPEC "%{!mgnu-ld:-dn -N so}" +#endif + +/* svr4.h links with crti.o/crtn.o, but elfos.h does not. We override elfos.h + so that we can use the standard ELF Unix method. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared: \ + %{!symbolic: \ + %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\ + crti.o%s crtbegin.o%s" + +/* End of elf.h */ diff --git a/contrib/gcc/config/ia64/fde-glibc.c b/contrib/gcc/config/ia64/fde-glibc.c new file mode 100644 index 0000000..83cc93a --- /dev/null +++ b/contrib/gcc/config/ia64/fde-glibc.c @@ -0,0 +1,164 @@ +/* Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@cygnus.com>. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. */ + +/* Locate the FDE entry for a given address, using glibc ld.so routines + to avoid register/deregister calls at DSO load/unload. */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include "config.h" +#include <stddef.h> +#include <stdlib.h> +#include <link.h> +#include "unwind-ia64.h" + +#if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 2) \ + || (__GLIBC__ == 2 && __GLIBC_MINOR__ == 2 && !defined(DT_CONFIG)) +# error You need GLIBC 2.2.4 or later on IA-64 Linux +#endif + +struct unw_ia64_callback_data +{ + Elf64_Addr pc; + unsigned long *segment_base; + unsigned long *gp; + struct unw_table_entry *ret; +}; + +static int +_Unwind_IteratePhdrCallback (struct dl_phdr_info *info, size_t size, void *ptr) +{ + struct unw_ia64_callback_data *data = (struct unw_ia64_callback_data *) ptr; + const Elf64_Phdr *phdr, *p_unwind, *p_dynamic; + long n, match; + Elf64_Addr load_base, seg_base; + struct unw_table_entry *f_base, *f; + size_t lo, hi; + + /* Make sure struct dl_phdr_info is at least as big as we need. */ + if (size < offsetof (struct dl_phdr_info, dlpi_phnum) + + sizeof (info->dlpi_phnum)) + return -1; + + match = 0; + phdr = info->dlpi_phdr; + load_base = info->dlpi_addr; + p_unwind = NULL; + p_dynamic = NULL; + seg_base = ~(Elf64_Addr) 0; + + /* See if PC falls into one of the loaded segments. Find the unwind + segment at the same time. */ + for (n = info->dlpi_phnum; --n >= 0; phdr++) + { + if (phdr->p_type == PT_LOAD) + { + Elf64_Addr vaddr = phdr->p_vaddr + load_base; + if (data->pc >= vaddr && data->pc < vaddr + phdr->p_memsz) + match = 1; + if (vaddr < seg_base) + seg_base = vaddr; + } + else if (phdr->p_type == PT_IA_64_UNWIND) + p_unwind = phdr; + else if (phdr->p_type == PT_DYNAMIC) + p_dynamic = phdr; + } + if (!match || !p_unwind) + return 0; + + /* Search for the FDE within the unwind segment. */ + + f_base = (struct unw_table_entry *) (p_unwind->p_vaddr + load_base); + lo = 0; + hi = p_unwind->p_memsz / sizeof (struct unw_table_entry); + + while (lo < hi) + { + size_t mid = (lo + hi) / 2; + + f = f_base + mid; + if (data->pc < f->start_offset + seg_base) + hi = mid; + else if (data->pc >= f->end_offset + seg_base) + lo = mid + 1; + else + goto found; + } + /* No need to search for further libraries when we know pc is contained + in this library. */ + return 1; + + found: + *data->segment_base = seg_base; + *data->gp = 0; + data->ret = f; + + if (p_dynamic) + { + /* For dynamicly linked executables and shared libraries, + DT_PLTGOT is the gp value for that object. */ + Elf64_Dyn *dyn = (Elf64_Dyn *)(p_dynamic->p_vaddr + load_base); + for (; dyn->d_tag != DT_NULL ; dyn++) + if (dyn->d_tag == DT_PLTGOT) + { + /* On IA-64, _DYNAMIC is writable and GLIBC has relocated it. */ + *data->gp = dyn->d_un.d_ptr; + break; + } + } + else + { + /* Otherwise this is a static executable with no _DYNAMIC. + The gp is constant program-wide. */ + register unsigned long gp __asm__("gp"); + *data->gp = gp; + } + + return 1; +} + +/* Return a pointer to the unwind table entry for the function + containing PC. */ + +struct unw_table_entry * +_Unwind_FindTableEntry (void *pc, unsigned long *segment_base, + unsigned long *gp) +{ + struct unw_ia64_callback_data data; + + data.pc = (Elf64_Addr) pc; + data.segment_base = segment_base; + data.gp = gp; + data.ret = NULL; + + if (dl_iterate_phdr (_Unwind_IteratePhdrCallback, &data) < 0) + return NULL; + + return data.ret; +} diff --git a/contrib/gcc/config/ia64/freebsd.h b/contrib/gcc/config/ia64/freebsd.h new file mode 100644 index 0000000..6140128 --- /dev/null +++ b/contrib/gcc/config/ia64/freebsd.h @@ -0,0 +1,66 @@ +/* Definitions for Intel IA-64 running FreeBSD using the ELF format + Copyright (C) 2001 Free Software Foundation, Inc. + Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{p:%e`-p' not supported; use `-pg' and gprof(1)} \ + %{Wl,*:%*} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{symbolic:-Bsymbolic} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!dynamic-linker:-dynamic-linker /usr/libexec/ld-elf.so.1}} \ + %{static:-Bstatic}}" + +#undef ASM_SPEC +#define ASM_SPEC "-x %{mconstant-gp} %{mauto-pic}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with <machine/ansi.h>. GCC defaults come from c-decl.c, + c-common.c, and config/<arch>/<arch>.h. */ + +/* Earlier headers may get this wrong for FreeBSD. + We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_UNSIGNED +#define WCHAR_UNSIGNED 0 + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (FreeBSD/IA-64 ELF)"); + +#define TARGET_ELF 1 + +#define DONT_USE_BUILTIN_SETJMP +#define JMP_BUF_SIZE 76 + +/* Output any profiling code before the prologue. */ + +#undef PROFILE_BEFORE_PROLOGUE +#define PROFILE_BEFORE_PROLOGUE 1 diff --git a/contrib/gcc/config/ia64/hpux.h b/contrib/gcc/config/ia64/hpux.h new file mode 100644 index 0000000..89b2902 --- /dev/null +++ b/contrib/gcc/config/ia64/hpux.h @@ -0,0 +1,124 @@ +/* Definitions of target machine GNU compiler. IA-64 version. + Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + Contributed by Steve Ellcey <sje@cup.hp.com> and + Reva Cuthbertson <reva@cup.hp.com> + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* This macro is a C statement to print on `stderr' a string describing the + particular machine description choice. */ + +#define TARGET_VERSION fprintf (stderr, " (IA-64) HP-UX"); + +#undef CPP_PREDEFINES +#define CPP_PREDEFINES "\ + -D__IA64__ -D__ia64 -D__ia64__ -D__hpux -D__hpux__ -Dhpux -Dunix \ + -D__BIG_ENDIAN__ -D_LONGLONG -D__ELF__ \ + -Asystem=hpux -Asystem=posix -Asystem=unix -Acpu=ia64 -Amachine=ia64 \ + -D_UINT128_T" + +/* -D__fpreg=long double is needed to compensate for the lack of __fpreg + which is a primitive type in HP C but does not exist in GNU C. Same + for __float80 and __float128. These types appear in HP-UX header + files and so must have some definition. */ + +#undef CPP_SPEC +#define CPP_SPEC "\ + %{mcpu=itanium:-D__itanium__} \ + %{mlp64:-D__LP64__ -D__LONG_MAX__=9223372036854775807L} \ + %{!ansi:%{!std=c*:%{!std=i*: -D_HPUX_SOURCE -D__STDC_EXT__}}} \ + -D__fpreg=long\\ double \ + -D__float80=long\\ double \ + -D__float128=long\\ double" + +#undef ASM_SPEC +#define ASM_SPEC "-x %{mconstant-gp} %{mauto-pic} \ + %{milp32:-milp32} %{mlp64:-mlp64}" + +#undef ENDFILE_SPEC + +#undef STARTFILE_SPEC +#ifdef CROSS_COMPILE +#define STARTFILE_SPEC "%{!shared:crt0%O%s}" +#else +#define STARTFILE_SPEC "/usr/ccs/lib/hpux64/crt0%O" +#endif + +#undef LINK_SPEC +#define LINK_SPEC "\ + +Accept TypeMismatch \ + %{shared:-b} \ + %{!shared: \ + -u main \ + %{!static: \ + %{rdynamic:-export-dynamic}} \ + %{static:-static}}" + +#undef LIB_SPEC +#define LIB_SPEC "%{!shared:%{!symbolic:-lc}}" + +#undef SUBTARGET_SWITCHES +#define SUBTARGET_SWITCHES \ + { "ilp32", MASK_ILP32, "Generate ILP32 code" }, \ + { "lp64", -MASK_ILP32, "Generate LP64 code" }, + +/* A C expression whose value is zero if pointers that need to be extended + from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and + greater then zero if they are zero-extended and less then zero if the + ptr_extend instruction should be used. */ + +#define POINTERS_EXTEND_UNSIGNED -1 + +#define DONT_USE_BUILTIN_SETJMP +#define JMP_BUF_SIZE (8 * 76) + +#undef CONST_SECTION_ASM_OP +#define CONST_SECTION_ASM_OP "\t.section\t.rodata,\t\"a\",\t\"progbits\"" + +#undef BITS_BIG_ENDIAN +#define BITS_BIG_ENDIAN 1 + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_DWARF2_ASM | MASK_BIG_ENDIAN) + +/* This needs to be set to force structure arguments with a single + field to be treated as structures and not as the type of their + field. Without this a structure with a single char will be + returned just like a char variable and that is wrong on HP-UX + IA64. TARGET_STRUCT_ARG_REG_LITTLE_ENDIAN triggers the special + structure handling, this macro simply ensures that single field + structures are always treated like structures. */ + +#define MEMBER_TYPE_FORCES_BLK(FIELD) 1 + +/* Override the setting of FUNCTION_ARG_REG_LITTLE_ENDIAN in + defaults.h. Setting this to true means that we are not passing + structures in registers in the "normal" big-endian way. See + See section 8.5 of the "Itanium Software Conventions and Runtime + Architecture", specifically Table 8-1 and the explanation of Byte 0 + alignment and LSB alignment and a description of how structures + are passed. */ + +#define FUNCTION_ARG_REG_LITTLE_ENDIAN 1 + +#undef FUNCTION_ARG_PADDING +#define FUNCTION_ARG_PADDING(MODE, TYPE) \ + ia64_hpux_function_arg_padding ((MODE), (TYPE)) + +#undef PAD_VARARGS_DOWN +#define PAD_VARARGS_DOWN (!AGGREGATE_TYPE_P (type)) diff --git a/contrib/gcc/config/ia64/hpux_longdouble.h b/contrib/gcc/config/ia64/hpux_longdouble.h new file mode 100644 index 0000000..bfc12d4 --- /dev/null +++ b/contrib/gcc/config/ia64/hpux_longdouble.h @@ -0,0 +1,103 @@ +/* Definitions of long double support for GNU compiler. + Copyright (C) 2000, 2001 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Tell real.c that we are not using INTEL_EXTENDED_IEEE_FORMAT */ + +#undef INTEL_EXTENDED_IEEE_FORMAT +#define INTEL_EXTENDED_IEEE_FORMAT 0 + +/* Define library calls for quad FP operations. These are all part of the + IA32 and IA64 ABIs. */ + +#define ADDTF3_LIBCALL "_U_Qfadd" +#define SUBTF3_LIBCALL "_U_Qfsub" +#define MULTF3_LIBCALL "_U_Qfmpy" +#define DIVTF3_LIBCALL "_U_Qfdiv" +#define NEGTF2_LIBCALL "_U_Qfneg" +#define ABSTF2_LIBCALL "_U_Qfabs" +#define SMINTF3_LIBCALL "_U_Qfmin" +#define SMAXTF3_LIBCALL "_U_Qfmax" +#define EXTENDSFTF2_LIBCALL "_U_Qfcnvff_sgl_to_quad" +#define EXTENDDFTF2_LIBCALL "_U_Qfcnvff_dbl_to_quad" +#define TRUNCTFSF2_LIBCALL "_U_Qfcnvff_quad_to_sgl" +#define TRUNCTFDF2_LIBCALL "_U_Qfcnvff_quad_to_dbl" +#define FLOATSITF2_LIBCALL "_U_Qfcnvxf_sgl_to_quad" +#define FLOATDITF2_LIBCALL "_U_Qfcnvxf_dbl_to_quad" +#define FIX_TRUNCTFSI2_LIBCALL "_U_Qfcnvfxt_quad_to_sgl" +#define FIX_TRUNCTFDI2_LIBCALL "_U_Qfcnvfxt_quad_to_dbl" +#define EQTF2_LIBCALL "_U_Qfeq" +#define NETF2_LIBCALL "_U_Qfne" +#define GTTF2_LIBCALL "_U_Qfgt" +#define GETF2_LIBCALL "_U_Qfge" +#define LTTF2_LIBCALL "_U_Qflt" +#define LETF2_LIBCALL "_U_Qfle" + + +#undef INIT_TARGET_OPTABS +#define INIT_TARGET_OPTABS \ + do { \ + add_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, ADDTF3_LIBCALL); \ + sub_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, SUBTF3_LIBCALL); \ + smul_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, MULTF3_LIBCALL); \ + sdiv_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, DIVTF3_LIBCALL); \ + smin_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, SMINTF3_LIBCALL); \ + smax_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, SMAXTF3_LIBCALL); \ + abs_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, ABSTF2_LIBCALL); \ + neg_optab->handlers[(int) TFmode].libfunc \ + = gen_rtx_SYMBOL_REF (Pmode, NEGTF2_LIBCALL); \ + extendsftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDSFTF2_LIBCALL); \ + extenddftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDDFTF2_LIBCALL); \ + trunctfsf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFSF2_LIBCALL); \ + trunctfdf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFDF2_LIBCALL); \ + floatsitf_libfunc = gen_rtx_SYMBOL_REF (Pmode, FLOATSITF2_LIBCALL); \ + floatditf_libfunc = gen_rtx_SYMBOL_REF (Pmode, FLOATDITF2_LIBCALL); \ + fixtfsi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFSI2_LIBCALL);\ + fixtfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFDI2_LIBCALL);\ + fixunstfsi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFSI2_LIBCALL); \ + fixunstfdi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFDI2_LIBCALL); \ + eqtf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EQTF2_LIBCALL); \ + netf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, NETF2_LIBCALL); \ + gttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GTTF2_LIBCALL); \ + getf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GETF2_LIBCALL); \ + lttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LTTF2_LIBCALL); \ + letf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LETF2_LIBCALL); \ + \ + sdiv_optab->handlers[(int) SImode].libfunc = 0; \ + udiv_optab->handlers[(int) SImode].libfunc = 0; \ + smod_optab->handlers[(int) SImode].libfunc = 0; \ + umod_optab->handlers[(int) SImode].libfunc = 0; \ + \ + INIT_SUBTARGET_OPTABS; \ + } while (0) + +/* This is meant to be redefined in the host dependent files */ +#define INIT_SUBTARGET_OPTABS + +/* Nonzero if a floating point comparison library call for + mode MODE that will return a boolean value. Zero if one + of the libgcc2 functions is used. */ +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode) diff --git a/contrib/gcc/config/ia64/ia64-protos.h b/contrib/gcc/config/ia64/ia64-protos.h new file mode 100644 index 0000000..43538cc --- /dev/null +++ b/contrib/gcc/config/ia64/ia64-protos.h @@ -0,0 +1,141 @@ +/* Definitions of target machine for GNU compiler for IA-64. + Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Variables defined in ia64.c. */ + +#ifdef RTX_CODE +extern rtx ia64_compare_op0, ia64_compare_op1; +#endif + +/* Functions defined in ia64.c */ + +#ifdef RTX_CODE +extern int call_operand PARAMS((rtx, enum machine_mode)); +extern int sdata_symbolic_operand PARAMS((rtx, enum machine_mode)); +extern int got_symbolic_operand PARAMS((rtx, enum machine_mode)); +extern int symbolic_operand PARAMS((rtx, enum machine_mode)); +extern int function_operand PARAMS((rtx, enum machine_mode)); +extern int setjmp_operand PARAMS((rtx, enum machine_mode)); +extern int move_operand PARAMS((rtx, enum machine_mode)); +extern int gr_register_operand PARAMS((rtx, enum machine_mode)); +extern int fr_register_operand PARAMS((rtx, enum machine_mode)); +extern int grfr_register_operand PARAMS((rtx, enum machine_mode)); +extern int gr_nonimmediate_operand PARAMS((rtx, enum machine_mode)); +extern int fr_nonimmediate_operand PARAMS((rtx, enum machine_mode)); +extern int grfr_nonimmediate_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_0_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_5bit_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_6bit_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_8bit_operand PARAMS((rtx, enum machine_mode)); +extern int grfr_reg_or_8bit_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_8bit_adjusted_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_8bit_and_adjusted_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_14bit_operand PARAMS((rtx, enum machine_mode)); +extern int gr_reg_or_22bit_operand PARAMS((rtx, enum machine_mode)); +extern int shift_count_operand PARAMS((rtx, enum machine_mode)); +extern int shift_32bit_count_operand PARAMS((rtx, enum machine_mode)); +extern int shladd_operand PARAMS((rtx, enum machine_mode)); +extern int fetchadd_operand PARAMS((rtx, enum machine_mode)); +extern int fr_reg_or_fp01_operand PARAMS((rtx, enum machine_mode)); +extern int normal_comparison_operator PARAMS((rtx, enum machine_mode)); +extern int adjusted_comparison_operator PARAMS((rtx, enum machine_mode)); +extern int signed_inequality_operator PARAMS((rtx, enum machine_mode)); +extern int destination_operand PARAMS((rtx, enum machine_mode)); +extern int not_postinc_memory_operand PARAMS((rtx, enum machine_mode)); +extern int predicate_operator PARAMS((rtx, enum machine_mode)); +extern int ar_lc_reg_operand PARAMS((rtx, enum machine_mode)); +extern int ar_ccv_reg_operand PARAMS((rtx, enum machine_mode)); +extern int ar_pfs_reg_operand PARAMS((rtx, enum machine_mode)); +extern int general_tfmode_operand PARAMS((rtx, enum machine_mode)); +extern int destination_tfmode_operand PARAMS((rtx, enum machine_mode)); +extern int tfreg_or_fp01_operand PARAMS((rtx, enum machine_mode)); + +extern int ia64_move_ok PARAMS((rtx, rtx)); +extern int ia64_depz_field_mask PARAMS((rtx, rtx)); +extern rtx ia64_gp_save_reg PARAMS((int)); +extern rtx ia64_split_timode PARAMS((rtx[], rtx, rtx)); +extern rtx spill_tfmode_operand PARAMS((rtx, int)); +extern rtx ia64_expand_compare PARAMS((enum rtx_code, enum machine_mode)); +extern void ia64_expand_call PARAMS((rtx, rtx, rtx, int)); + +extern HOST_WIDE_INT ia64_initial_elimination_offset PARAMS((int, int)); +extern void ia64_expand_prologue PARAMS((void)); +extern void ia64_expand_epilogue PARAMS((int)); + +extern int ia64_direct_return PARAMS((void)); +extern void ia64_expand_load_address PARAMS((rtx, rtx, rtx)); +extern int ia64_hard_regno_rename_ok PARAMS((int, int)); + +extern void ia64_initialize_trampoline PARAMS((rtx, rtx, rtx)); +extern void ia64_print_operand_address PARAMS((FILE *, rtx)); +extern void ia64_print_operand PARAMS((FILE *, rtx, int)); +extern enum reg_class ia64_secondary_reload_class PARAMS((enum reg_class, + enum machine_mode, + rtx)); +extern void ia64_reorg PARAMS((rtx)); +extern void process_for_unwind_directive PARAMS ((FILE *, rtx)); +extern const char *get_bundle_name PARAMS ((int)); +#endif /* RTX_CODE */ + +#ifdef TREE_CODE +#ifdef RTX_CODE +extern rtx ia64_function_arg PARAMS((CUMULATIVE_ARGS *, enum machine_mode, + tree, int, int)); +extern rtx ia64_expand_builtin PARAMS((tree, rtx, rtx, + enum machine_mode, int)); +extern void ia64_va_start PARAMS((int, tree, rtx)); +extern rtx ia64_va_arg PARAMS((tree, tree)); +extern rtx ia64_function_value PARAMS((tree, tree)); +#endif /* RTX_CODE */ + +extern void ia64_setup_incoming_varargs PARAMS((CUMULATIVE_ARGS, int, tree, + int *, int)); +extern int ia64_function_arg_partial_nregs PARAMS((CUMULATIVE_ARGS *, + enum machine_mode, + tree, int)); +extern void ia64_function_arg_advance PARAMS((CUMULATIVE_ARGS *, + enum machine_mode, + tree, int)); +extern int ia64_return_in_memory PARAMS((tree)); +extern void ia64_asm_output_external PARAMS((FILE *, tree, const char *)); + +extern void ia64_encode_section_info PARAMS((tree)); +#endif /* TREE_CODE */ + +extern int ia64_register_move_cost PARAMS((enum machine_mode, enum reg_class, + enum reg_class)); +extern int ia64_epilogue_uses PARAMS((int)); +extern void emit_safe_across_calls PARAMS((FILE *)); +extern void ia64_init_builtins PARAMS((void)); +extern void ia64_override_options PARAMS((void)); +extern int ia64_dbx_register_number PARAMS((int)); + +#ifdef SDATA_SECTION_ASM_OP +extern void sdata_section PARAMS ((void)); +#endif + +#ifdef SBSS_SECTION_ASM_OP +extern void sbss_section PARAMS ((void)); +#endif + +#ifdef ARGS_SIZE_RTX +/* expr.h defines ARGS_SIZE_RTX and `enum direction'. */ +extern enum direction ia64_hpux_function_arg_padding PARAMS ((enum machine_mode, tree)); +#endif /* ARGS_SIZE_RTX */ diff --git a/contrib/gcc/config/ia64/ia64.c b/contrib/gcc/config/ia64/ia64.c new file mode 100644 index 0000000..7ca060b --- /dev/null +++ b/contrib/gcc/config/ia64/ia64.c @@ -0,0 +1,7820 @@ +/* Definitions of target machine for GNU compiler. + Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + Contributed by James E. Wilson <wilson@cygnus.com> and + David Mosberger <davidm@hpl.hp.com>. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "expr.h" +#include "optabs.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "ggc.h" +#include "basic-block.h" +#include "toplev.h" +#include "sched-int.h" +#include "timevar.h" +#include "target.h" +#include "target-def.h" + +/* This is used for communication between ASM_OUTPUT_LABEL and + ASM_OUTPUT_LABELREF. */ +int ia64_asm_output_label = 0; + +/* Define the information needed to generate branch and scc insns. This is + stored from the compare operation. */ +struct rtx_def * ia64_compare_op0; +struct rtx_def * ia64_compare_op1; + +/* Register names for ia64_expand_prologue. */ +static const char * const ia64_reg_numbers[96] = +{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", + "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", + "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", + "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", + "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", + "r96", "r97", "r98", "r99", "r100","r101","r102","r103", + "r104","r105","r106","r107","r108","r109","r110","r111", + "r112","r113","r114","r115","r116","r117","r118","r119", + "r120","r121","r122","r123","r124","r125","r126","r127"}; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +static const char * const ia64_input_reg_names[8] = +{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +static const char * const ia64_local_reg_names[80] = +{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", + "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", + "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", + "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", + "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", + "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", + "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", + "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", + "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", + "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +static const char * const ia64_output_reg_names[8] = +{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; + +/* String used with the -mfixed-range= option. */ +const char *ia64_fixed_range_string; + +/* Determines whether we run our final scheduling pass or not. We always + avoid the normal second scheduling pass. */ +static int ia64_flag_schedule_insns2; + +/* Variables which are this size or smaller are put in the sdata/sbss + sections. */ + +unsigned int ia64_section_threshold; + +static int find_gr_spill PARAMS ((int)); +static int next_scratch_gr_reg PARAMS ((void)); +static void mark_reg_gr_used_mask PARAMS ((rtx, void *)); +static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT)); +static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT)); +static void finish_spill_pointers PARAMS ((void)); +static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT)); +static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx)); +static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT)); +static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx)); +static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx)); +static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx)); + +static enum machine_mode hfa_element_mode PARAMS ((tree, int)); +static void fix_range PARAMS ((const char *)); +static void ia64_add_gc_roots PARAMS ((void)); +static void ia64_init_machine_status PARAMS ((struct function *)); +static void ia64_mark_machine_status PARAMS ((struct function *)); +static void ia64_free_machine_status PARAMS ((struct function *)); +static void emit_insn_group_barriers PARAMS ((FILE *, rtx)); +static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx)); +static void emit_predicate_relation_info PARAMS ((void)); +static void process_epilogue PARAMS ((void)); +static int process_set PARAMS ((FILE *, rtx)); + +static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode, + tree, rtx)); +static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode, + tree, rtx)); +static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int, + tree, rtx)); +static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode, + tree, rtx)); +static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx)); +const struct attribute_spec ia64_attribute_table[]; +static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int)); +static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT)); +static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); +static void ia64_output_function_end_prologue PARAMS ((FILE *)); + +static int ia64_issue_rate PARAMS ((void)); +static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int)); +static void ia64_sched_init PARAMS ((FILE *, int, int)); +static void ia64_sched_finish PARAMS ((FILE *, int)); +static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *, + int *, int, int)); +static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); +static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int)); +static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int)); +static rtx ia64_cycle_display PARAMS ((int, rtx)); + + +/* Initialize the GCC target structure. */ +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS ia64_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN ia64_expand_builtin + +#undef TARGET_ASM_BYTE_OP +#define TARGET_ASM_BYTE_OP "\tdata1\t" +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER ia64_assemble_integer + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate +#undef TARGET_SCHED_VARIABLE_ISSUE +#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT ia64_sched_init +#undef TARGET_SCHED_FINISH +#define TARGET_SCHED_FINISH ia64_sched_finish +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER ia64_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 +#undef TARGET_SCHED_CYCLE_DISPLAY +#define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ + +int +call_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (mode != GET_MODE (op)) + return 0; + + return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG + || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); +} + +/* Return 1 if OP refers to a symbol in the sdata section. */ + +int +sdata_symbolic_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + switch (GET_CODE (op)) + { + case CONST: + if (GET_CODE (XEXP (op, 0)) != PLUS + || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF) + break; + op = XEXP (XEXP (op, 0), 0); + /* FALLTHRU */ + + case SYMBOL_REF: + if (CONSTANT_POOL_ADDRESS_P (op)) + return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold; + else + return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR; + + default: + break; + } + + return 0; +} + +/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */ + +int +got_symbolic_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + switch (GET_CODE (op)) + { + case CONST: + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS) + return 0; + if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF) + return 0; + op = XEXP (op, 1); + if (GET_CODE (op) != CONST_INT) + return 0; + + return 1; + + /* Ok if we're not using GOT entries at all. */ + if (TARGET_NO_PIC || TARGET_AUTO_PIC) + return 1; + + /* "Ok" while emitting rtl, since otherwise we won't be provided + with the entire offset during emission, which makes it very + hard to split the offset into high and low parts. */ + if (rtx_equal_function_value_matters) + return 1; + + /* Force the low 14 bits of the constant to zero so that we do not + use up so many GOT entries. */ + return (INTVAL (op) & 0x3fff) == 0; + + case SYMBOL_REF: + case LABEL_REF: + return 1; + + default: + break; + } + return 0; +} + +/* Return 1 if OP refers to a symbol. */ + +int +symbolic_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + switch (GET_CODE (op)) + { + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return 1; + + default: + break; + } + return 0; +} + +/* Return 1 if OP refers to a function. */ + +int +function_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) + return 1; + else + return 0; +} + +/* Return 1 if OP is setjmp or a similar function. */ + +/* ??? This is an unsatisfying solution. Should rethink. */ + +int +setjmp_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + const char *name; + int retval = 0; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + name = XSTR (op, 0); + + /* The following code is borrowed from special_function_p in calls.c. */ + + /* Disregard prefix _, __ or __x. */ + if (name[0] == '_') + { + if (name[1] == '_' && name[2] == 'x') + name += 3; + else if (name[1] == '_') + name += 2; + else + name += 1; + } + + if (name[0] == 's') + { + retval + = ((name[1] == 'e' + && (! strcmp (name, "setjmp") + || ! strcmp (name, "setjmp_syscall"))) + || (name[1] == 'i' + && ! strcmp (name, "sigsetjmp")) + || (name[1] == 'a' + && ! strcmp (name, "savectx"))); + } + else if ((name[0] == 'q' && name[1] == 's' + && ! strcmp (name, "qsetjmp")) + || (name[0] == 'v' && name[1] == 'f' + && ! strcmp (name, "vfork"))) + retval = 1; + + return retval; +} + +/* Return 1 if OP is a general operand, but when pic exclude symbolic + operands. */ + +/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF + from PREDICATE_CODES. */ + +int +move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! TARGET_NO_PIC && symbolic_operand (op, mode)) + return 0; + + return general_operand (op, mode); +} + +/* Return 1 if OP is a register operand that is (or could be) a GR reg. */ + +int +gr_register_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! register_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return GENERAL_REGNO_P (regno); + } + return 1; +} + +/* Return 1 if OP is a register operand that is (or could be) an FR reg. */ + +int +fr_register_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! register_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return FR_REGNO_P (regno); + } + return 1; +} + +/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */ + +int +grfr_register_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! register_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); + } + return 1; +} + +/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */ + +int +gr_nonimmediate_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! nonimmediate_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return GENERAL_REGNO_P (regno); + } + return 1; +} + +/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */ + +int +fr_nonimmediate_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! nonimmediate_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return FR_REGNO_P (regno); + } + return 1; +} + +/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */ + +int +grfr_nonimmediate_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! nonimmediate_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); + } + return 1; +} + +/* Return 1 if OP is a GR register operand, or zero. */ + +int +gr_reg_or_0_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (op == const0_rtx || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */ + +int +gr_reg_or_5bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */ + +int +gr_reg_or_6bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */ + +int +gr_reg_or_8bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */ + +int +grfr_reg_or_8bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || grfr_register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate + operand. */ + +int +gr_reg_or_8bit_adjusted_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or is valid for both an 8 bit + immediate and an 8 bit adjusted immediate operand. This is necessary + because when we emit a compare, we don't know what the condition will be, + so we need the union of the immediates accepted by GT and LT. */ + +int +gr_reg_or_8bit_and_adjusted_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) + && CONST_OK_FOR_L (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ + +int +gr_reg_or_14bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ + +int +gr_reg_or_22bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || gr_register_operand (op, mode)); +} + +/* Return 1 if OP is a 6 bit immediate operand. */ + +int +shift_count_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX); +} + +/* Return 1 if OP is a 5 bit immediate operand. */ + +int +shift_32bit_count_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return ((GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) < 32)) + || GET_CODE (op) == CONSTANT_P_RTX); +} + +/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ + +int +shladd_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) == 2 || INTVAL (op) == 4 + || INTVAL (op) == 8 || INTVAL (op) == 16)); +} + +/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ + +int +fetchadd_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) == -16 || INTVAL (op) == -8 || + INTVAL (op) == -4 || INTVAL (op) == -1 || + INTVAL (op) == 1 || INTVAL (op) == 4 || + INTVAL (op) == 8 || INTVAL (op) == 16)); +} + +/* Return 1 if OP is a floating-point constant zero, one, or a register. */ + +int +fr_reg_or_fp01_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) + || fr_register_operand (op, mode)); +} + +/* Like nonimmediate_operand, but don't allow MEMs that try to use a + POST_MODIFY with a REG as displacement. */ + +int +destination_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! nonimmediate_operand (op, mode)) + return 0; + if (GET_CODE (op) == MEM + && GET_CODE (XEXP (op, 0)) == POST_MODIFY + && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG) + return 0; + return 1; +} + +/* Like memory_operand, but don't allow post-increments. */ + +int +not_postinc_memory_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (memory_operand (op, mode) + && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a'); +} + +/* Return 1 if this is a comparison operator, which accepts an normal 8-bit + signed immediate operand. */ + +int +normal_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == EQ || code == NE + || code == GT || code == LE || code == GTU || code == LEU)); +} + +/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit + signed immediate operand. */ + +int +adjusted_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == LT || code == GE || code == LTU || code == GEU)); +} + +/* Return 1 if this is a signed inequality operator. */ + +int +signed_inequality_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == GE || code == GT + || code == LE || code == LT)); +} + +/* Return 1 if this operator is valid for predication. */ + +int +predicate_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((GET_MODE (op) == mode || mode == VOIDmode) + && (code == EQ || code == NE)); +} + +/* Return 1 if this operator can be used in a conditional operation. */ + +int +condop_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((GET_MODE (op) == mode || mode == VOIDmode) + && (code == PLUS || code == MINUS || code == AND + || code == IOR || code == XOR)); +} + +/* Return 1 if this is the ar.lc register. */ + +int +ar_lc_reg_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + return (GET_MODE (op) == DImode + && (mode == DImode || mode == VOIDmode) + && GET_CODE (op) == REG + && REGNO (op) == AR_LC_REGNUM); +} + +/* Return 1 if this is the ar.ccv register. */ + +int +ar_ccv_reg_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + return ((GET_MODE (op) == mode || mode == VOIDmode) + && GET_CODE (op) == REG + && REGNO (op) == AR_CCV_REGNUM); +} + +/* Return 1 if this is the ar.pfs register. */ + +int +ar_pfs_reg_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + return ((GET_MODE (op) == mode || mode == VOIDmode) + && GET_CODE (op) == REG + && REGNO (op) == AR_PFS_REGNUM); +} + +/* Like general_operand, but don't allow (mem (addressof)). */ + +int +general_tfmode_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! general_operand (op, mode)) + return 0; + if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) + return 0; + return 1; +} + +/* Similarly. */ + +int +destination_tfmode_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! destination_operand (op, mode)) + return 0; + if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) + return 0; + return 1; +} + +/* Similarly. */ + +int +tfreg_or_fp01_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == SUBREG) + return 0; + return fr_reg_or_fp01_operand (op, mode); +} + +/* Return 1 if the operands of a move are ok. */ + +int +ia64_move_ok (dst, src) + rtx dst, src; +{ + /* If we're under init_recog_no_volatile, we'll not be able to use + memory_operand. So check the code directly and don't worry about + the validity of the underlying address, which should have been + checked elsewhere anyway. */ + if (GET_CODE (dst) != MEM) + return 1; + if (GET_CODE (src) == MEM) + return 0; + if (register_operand (src, VOIDmode)) + return 1; + + /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ + if (INTEGRAL_MODE_P (GET_MODE (dst))) + return src == const0_rtx; + else + return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); +} + +/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction. + Return the length of the field, or <= 0 on failure. */ + +int +ia64_depz_field_mask (rop, rshift) + rtx rop, rshift; +{ + unsigned HOST_WIDE_INT op = INTVAL (rop); + unsigned HOST_WIDE_INT shift = INTVAL (rshift); + + /* Get rid of the zero bits we're shifting in. */ + op >>= shift; + + /* We must now have a solid block of 1's at bit 0. */ + return exact_log2 (op + 1); +} + +/* Expand a symbolic constant load. */ +/* ??? Should generalize this, so that we can also support 32 bit pointers. */ + +void +ia64_expand_load_address (dest, src, scratch) + rtx dest, src, scratch; +{ + rtx temp; + + /* The destination could be a MEM during initial rtl generation, + which isn't a valid destination for the PIC load address patterns. */ + if (! register_operand (dest, DImode)) + temp = gen_reg_rtx (DImode); + else + temp = dest; + + if (TARGET_AUTO_PIC) + emit_insn (gen_load_gprel64 (temp, src)); + else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src)) + emit_insn (gen_load_fptr (temp, src)); + else if (sdata_symbolic_operand (src, DImode)) + emit_insn (gen_load_gprel (temp, src)); + else if (GET_CODE (src) == CONST + && GET_CODE (XEXP (src, 0)) == PLUS + && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT + && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0) + { + rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode); + rtx sym = XEXP (XEXP (src, 0), 0); + HOST_WIDE_INT ofs, hi, lo; + + /* Split the offset into a sign extended 14-bit low part + and a complementary high part. */ + ofs = INTVAL (XEXP (XEXP (src, 0), 1)); + lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000; + hi = ofs - lo; + + if (! scratch) + scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode); + + emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi), + scratch)); + emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo))); + } + else + { + rtx insn; + if (! scratch) + scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode); + + insn = emit_insn (gen_load_symptr (temp, src, scratch)); + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn)); + } + + if (temp != dest) + emit_move_insn (dest, temp); +} + +rtx +ia64_gp_save_reg (setjmp_p) + int setjmp_p; +{ + rtx save = cfun->machine->ia64_gp_save; + + if (save != NULL) + { + /* We can't save GP in a pseudo if we are calling setjmp, because + pseudos won't be restored by longjmp. For now, we save it in r4. */ + /* ??? It would be more efficient to save this directly into a stack + slot. Unfortunately, the stack slot address gets cse'd across + the setjmp call because the NOTE_INSN_SETJMP note is in the wrong + place. */ + + /* ??? Get the barf bag, Virginia. We've got to replace this thing + in place, since this rtx is used in exception handling receivers. + Moreover, we must get this rtx out of regno_reg_rtx or reload + will do the wrong thing. */ + unsigned int old_regno = REGNO (save); + if (setjmp_p && old_regno != GR_REG (4)) + { + REGNO (save) = GR_REG (4); + regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno); + } + } + else + { + if (setjmp_p) + save = gen_rtx_REG (DImode, GR_REG (4)); + else if (! optimize) + save = gen_rtx_REG (DImode, LOC_REG (0)); + else + save = gen_reg_rtx (DImode); + cfun->machine->ia64_gp_save = save; + } + + return save; +} + +/* Split a post-reload TImode reference into two DImode components. */ + +rtx +ia64_split_timode (out, in, scratch) + rtx out[2]; + rtx in, scratch; +{ + switch (GET_CODE (in)) + { + case REG: + out[0] = gen_rtx_REG (DImode, REGNO (in)); + out[1] = gen_rtx_REG (DImode, REGNO (in) + 1); + return NULL_RTX; + + case MEM: + { + rtx base = XEXP (in, 0); + + switch (GET_CODE (base)) + { + case REG: + out[0] = adjust_address (in, DImode, 0); + break; + case POST_MODIFY: + base = XEXP (base, 0); + out[0] = adjust_address (in, DImode, 0); + break; + + /* Since we're changing the mode, we need to change to POST_MODIFY + as well to preserve the size of the increment. Either that or + do the update in two steps, but we've already got this scratch + register handy so let's use it. */ + case POST_INC: + base = XEXP (base, 0); + out[0] + = change_address (in, DImode, + gen_rtx_POST_MODIFY + (Pmode, base, plus_constant (base, 16))); + break; + case POST_DEC: + base = XEXP (base, 0); + out[0] + = change_address (in, DImode, + gen_rtx_POST_MODIFY + (Pmode, base, plus_constant (base, -16))); + break; + default: + abort (); + } + + if (scratch == NULL_RTX) + abort (); + out[1] = change_address (in, DImode, scratch); + return gen_adddi3 (scratch, base, GEN_INT (8)); + } + + case CONST_INT: + case CONST_DOUBLE: + split_double (in, &out[0], &out[1]); + return NULL_RTX; + + default: + abort (); + } +} + +/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go + through memory plus an extra GR scratch register. Except that you can + either get the first from SECONDARY_MEMORY_NEEDED or the second from + SECONDARY_RELOAD_CLASS, but not both. + + We got into problems in the first place by allowing a construct like + (subreg:TF (reg:TI)), which we got from a union containing a long double. + This solution attempts to prevent this situation from occurring. When + we see something like the above, we spill the inner register to memory. */ + +rtx +spill_tfmode_operand (in, force) + rtx in; + int force; +{ + if (GET_CODE (in) == SUBREG + && GET_MODE (SUBREG_REG (in)) == TImode + && GET_CODE (SUBREG_REG (in)) == REG) + { + rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE); + return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); + } + else if (force && GET_CODE (in) == REG) + { + rtx mem = gen_mem_addressof (in, NULL_TREE); + return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); + } + else if (GET_CODE (in) == MEM + && GET_CODE (XEXP (in, 0)) == ADDRESSOF) + return change_address (in, TFmode, copy_to_reg (XEXP (in, 0))); + else + return in; +} + +/* Emit comparison instruction if necessary, returning the expression + that holds the compare result in the proper mode. */ + +rtx +ia64_expand_compare (code, mode) + enum rtx_code code; + enum machine_mode mode; +{ + rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1; + rtx cmp; + + /* If we have a BImode input, then we already have a compare result, and + do not need to emit another comparison. */ + if (GET_MODE (op0) == BImode) + { + if ((code == NE || code == EQ) && op1 == const0_rtx) + cmp = op0; + else + abort (); + } + else + { + cmp = gen_reg_rtx (BImode); + emit_insn (gen_rtx_SET (VOIDmode, cmp, + gen_rtx_fmt_ee (code, BImode, op0, op1))); + code = NE; + } + + return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx); +} + +/* Emit the appropriate sequence for a call. */ + +void +ia64_expand_call (retval, addr, nextarg, sibcall_p) + rtx retval; + rtx addr; + rtx nextarg; + int sibcall_p; +{ + rtx insn, b0, pfs, gp_save, narg_rtx; + int narg; + + addr = XEXP (addr, 0); + b0 = gen_rtx_REG (DImode, R_BR (0)); + pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM); + + if (! nextarg) + narg = 0; + else if (IN_REGNO_P (REGNO (nextarg))) + narg = REGNO (nextarg) - IN_REG (0); + else + narg = REGNO (nextarg) - OUT_REG (0); + narg_rtx = GEN_INT (narg); + + if (TARGET_NO_PIC || TARGET_AUTO_PIC) + { + if (sibcall_p) + insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs); + else if (! retval) + insn = gen_call_nopic (addr, narg_rtx, b0); + else + insn = gen_call_value_nopic (retval, addr, narg_rtx, b0); + emit_call_insn (insn); + return; + } + + if (sibcall_p) + gp_save = NULL_RTX; + else + gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode)); + + /* If this is an indirect call, then we have the address of a descriptor. */ + if (! symbolic_operand (addr, VOIDmode)) + { + rtx dest; + + if (! sibcall_p) + emit_move_insn (gp_save, pic_offset_table_rtx); + + dest = force_reg (DImode, gen_rtx_MEM (DImode, addr)); + emit_move_insn (pic_offset_table_rtx, + gen_rtx_MEM (DImode, plus_constant (addr, 8))); + + if (sibcall_p) + insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs); + else if (! retval) + insn = gen_call_pic (dest, narg_rtx, b0); + else + insn = gen_call_value_pic (retval, dest, narg_rtx, b0); + emit_call_insn (insn); + + if (! sibcall_p) + emit_move_insn (pic_offset_table_rtx, gp_save); + } + else if (TARGET_CONST_GP) + { + if (sibcall_p) + insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs); + else if (! retval) + insn = gen_call_nopic (addr, narg_rtx, b0); + else + insn = gen_call_value_nopic (retval, addr, narg_rtx, b0); + emit_call_insn (insn); + } + else + { + if (sibcall_p) + emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0, pfs)); + else + { + emit_move_insn (gp_save, pic_offset_table_rtx); + + if (! retval) + insn = gen_call_pic (addr, narg_rtx, b0); + else + insn = gen_call_value_pic (retval, addr, narg_rtx, b0); + emit_call_insn (insn); + + emit_move_insn (pic_offset_table_rtx, gp_save); + } + } +} + +/* Begin the assembly file. */ + +void +emit_safe_across_calls (f) + FILE *f; +{ + unsigned int rs, re; + int out_state; + + rs = 1; + out_state = 0; + while (1) + { + while (rs < 64 && call_used_regs[PR_REG (rs)]) + rs++; + if (rs >= 64) + break; + for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) + continue; + if (out_state == 0) + { + fputs ("\t.pred.safe_across_calls ", f); + out_state = 1; + } + else + fputc (',', f); + if (re == rs + 1) + fprintf (f, "p%u", rs); + else + fprintf (f, "p%u-p%u", rs, re - 1); + rs = re + 1; + } + if (out_state) + fputc ('\n', f); +} + + +/* Structure to be filled in by ia64_compute_frame_size with register + save masks and offsets for the current function. */ + +struct ia64_frame_info +{ + HOST_WIDE_INT total_size; /* size of the stack frame, not including + the caller's scratch area. */ + HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ + HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ + HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ + HARD_REG_SET mask; /* mask of saved registers. */ + unsigned int gr_used_mask; /* mask of registers in use as gr spill + registers or long-term scratches. */ + int n_spilled; /* number of spilled registers. */ + int reg_fp; /* register for fp. */ + int reg_save_b0; /* save register for b0. */ + int reg_save_pr; /* save register for prs. */ + int reg_save_ar_pfs; /* save register for ar.pfs. */ + int reg_save_ar_unat; /* save register for ar.unat. */ + int reg_save_ar_lc; /* save register for ar.lc. */ + int n_input_regs; /* number of input registers used. */ + int n_local_regs; /* number of local registers used. */ + int n_output_regs; /* number of output registers used. */ + int n_rotate_regs; /* number of rotating registers used. */ + + char need_regstk; /* true if a .regstk directive needed. */ + char initialized; /* true if the data is finalized. */ +}; + +/* Current frame information calculated by ia64_compute_frame_size. */ +static struct ia64_frame_info current_frame_info; + +/* Helper function for ia64_compute_frame_size: find an appropriate general + register to spill some special register to. SPECIAL_SPILL_MASK contains + bits in GR0 to GR31 that have already been allocated by this routine. + TRY_LOCALS is true if we should attempt to locate a local regnum. */ + +static int +find_gr_spill (try_locals) + int try_locals; +{ + int regno; + + /* If this is a leaf function, first try an otherwise unused + call-clobbered register. */ + if (current_function_is_leaf) + { + for (regno = GR_REG (1); regno <= GR_REG (31); regno++) + if (! regs_ever_live[regno] + && call_used_regs[regno] + && ! fixed_regs[regno] + && ! global_regs[regno] + && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) + { + current_frame_info.gr_used_mask |= 1 << regno; + return regno; + } + } + + if (try_locals) + { + regno = current_frame_info.n_local_regs; + /* If there is a frame pointer, then we can't use loc79, because + that is HARD_FRAME_POINTER_REGNUM. In particular, see the + reg_name switching code in ia64_expand_prologue. */ + if (regno < (80 - frame_pointer_needed)) + { + current_frame_info.n_local_regs = regno + 1; + return LOC_REG (0) + regno; + } + } + + /* Failed to find a general register to spill to. Must use stack. */ + return 0; +} + +/* In order to make for nice schedules, we try to allocate every temporary + to a different register. We must of course stay away from call-saved, + fixed, and global registers. We must also stay away from registers + allocated in current_frame_info.gr_used_mask, since those include regs + used all through the prologue. + + Any register allocated here must be used immediately. The idea is to + aid scheduling, not to solve data flow problems. */ + +static int last_scratch_gr_reg; + +static int +next_scratch_gr_reg () +{ + int i, regno; + + for (i = 0; i < 32; ++i) + { + regno = (last_scratch_gr_reg + i + 1) & 31; + if (call_used_regs[regno] + && ! fixed_regs[regno] + && ! global_regs[regno] + && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) + { + last_scratch_gr_reg = regno; + return regno; + } + } + + /* There must be _something_ available. */ + abort (); +} + +/* Helper function for ia64_compute_frame_size, called through + diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ + +static void +mark_reg_gr_used_mask (reg, data) + rtx reg; + void *data ATTRIBUTE_UNUSED; +{ + unsigned int regno = REGNO (reg); + if (regno < 32) + { + unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + for (i = 0; i < n; ++i) + current_frame_info.gr_used_mask |= 1 << (regno + i); + } +} + +/* Returns the number of bytes offset between the frame pointer and the stack + pointer for the current function. SIZE is the number of bytes of space + needed for local variables. */ + +static void +ia64_compute_frame_size (size) + HOST_WIDE_INT size; +{ + HOST_WIDE_INT total_size; + HOST_WIDE_INT spill_size = 0; + HOST_WIDE_INT extra_spill_size = 0; + HOST_WIDE_INT pretend_args_size; + HARD_REG_SET mask; + int n_spilled = 0; + int spilled_gr_p = 0; + int spilled_fr_p = 0; + unsigned int regno; + int i; + + if (current_frame_info.initialized) + return; + + memset (¤t_frame_info, 0, sizeof current_frame_info); + CLEAR_HARD_REG_SET (mask); + + /* Don't allocate scratches to the return register. */ + diddle_return_value (mark_reg_gr_used_mask, NULL); + + /* Don't allocate scratches to the EH scratch registers. */ + if (cfun->machine->ia64_eh_epilogue_sp) + mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); + if (cfun->machine->ia64_eh_epilogue_bsp) + mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); + + /* Find the size of the register stack frame. We have only 80 local + registers, because we reserve 8 for the inputs and 8 for the + outputs. */ + + /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, + since we'll be adjusting that down later. */ + regno = LOC_REG (78) + ! frame_pointer_needed; + for (; regno >= LOC_REG (0); regno--) + if (regs_ever_live[regno]) + break; + current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; + + /* For functions marked with the syscall_linkage attribute, we must mark + all eight input registers as in use, so that locals aren't visible to + the caller. */ + + if (cfun->machine->n_varargs > 0 + || lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + current_frame_info.n_input_regs = 8; + else + { + for (regno = IN_REG (7); regno >= IN_REG (0); regno--) + if (regs_ever_live[regno]) + break; + current_frame_info.n_input_regs = regno - IN_REG (0) + 1; + } + + for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) + if (regs_ever_live[regno]) + break; + i = regno - OUT_REG (0) + 1; + + /* When -p profiling, we need one output register for the mcount argument. + Likwise for -a profiling for the bb_init_func argument. For -ax + profiling, we need two output registers for the two bb_init_trace_func + arguments. */ + if (current_function_profile) + i = MAX (i, 1); + current_frame_info.n_output_regs = i; + + /* ??? No rotating register support yet. */ + current_frame_info.n_rotate_regs = 0; + + /* Discover which registers need spilling, and how much room that + will take. Begin with floating point and general registers, + which will always wind up on the stack. */ + + for (regno = FR_REG (2); regno <= FR_REG (127); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + spill_size += 16; + n_spilled += 1; + spilled_fr_p = 1; + } + + for (regno = GR_REG (1); regno <= GR_REG (31); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + spill_size += 8; + n_spilled += 1; + spilled_gr_p = 1; + } + + for (regno = BR_REG (1); regno <= BR_REG (7); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + spill_size += 8; + n_spilled += 1; + } + + /* Now come all special registers that might get saved in other + general registers. */ + + if (frame_pointer_needed) + { + current_frame_info.reg_fp = find_gr_spill (1); + /* If we did not get a register, then we take LOC79. This is guaranteed + to be free, even if regs_ever_live is already set, because this is + HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, + as we don't count loc79 above. */ + if (current_frame_info.reg_fp == 0) + { + current_frame_info.reg_fp = LOC_REG (79); + current_frame_info.n_local_regs++; + } + } + + if (! current_function_is_leaf) + { + /* Emit a save of BR0 if we call other functions. Do this even + if this function doesn't return, as EH depends on this to be + able to unwind the stack. */ + SET_HARD_REG_BIT (mask, BR_REG (0)); + + current_frame_info.reg_save_b0 = find_gr_spill (1); + if (current_frame_info.reg_save_b0 == 0) + { + spill_size += 8; + n_spilled += 1; + } + + /* Similarly for ar.pfs. */ + SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); + current_frame_info.reg_save_ar_pfs = find_gr_spill (1); + if (current_frame_info.reg_save_ar_pfs == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } + else + { + if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)]) + { + SET_HARD_REG_BIT (mask, BR_REG (0)); + spill_size += 8; + n_spilled += 1; + } + } + + /* Unwind descriptor hackery: things are most efficient if we allocate + consecutive GR save registers for RP, PFS, FP in that order. However, + it is absolutely critical that FP get the only hard register that's + guaranteed to be free, so we allocated it first. If all three did + happen to be allocated hard regs, and are consecutive, rearrange them + into the preferred order now. */ + if (current_frame_info.reg_fp != 0 + && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1 + && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2) + { + current_frame_info.reg_save_b0 = current_frame_info.reg_fp; + current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1; + current_frame_info.reg_fp = current_frame_info.reg_fp + 2; + } + + /* See if we need to store the predicate register block. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + break; + if (regno <= PR_REG (63)) + { + SET_HARD_REG_BIT (mask, PR_REG (0)); + current_frame_info.reg_save_pr = find_gr_spill (1); + if (current_frame_info.reg_save_pr == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + + /* ??? Mark them all as used so that register renaming and such + are free to use them. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + regs_ever_live[regno] = 1; + } + + /* If we're forced to use st8.spill, we're forced to save and restore + ar.unat as well. */ + if (spilled_gr_p || cfun->machine->n_varargs) + { + regs_ever_live[AR_UNAT_REGNUM] = 1; + SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); + current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0); + if (current_frame_info.reg_save_ar_unat == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } + + if (regs_ever_live[AR_LC_REGNUM]) + { + SET_HARD_REG_BIT (mask, AR_LC_REGNUM); + current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0); + if (current_frame_info.reg_save_ar_lc == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } + + /* If we have an odd number of words of pretend arguments written to + the stack, then the FR save area will be unaligned. We round the + size of this area up to keep things 16 byte aligned. */ + if (spilled_fr_p) + pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size); + else + pretend_args_size = current_function_pretend_args_size; + + total_size = (spill_size + extra_spill_size + size + pretend_args_size + + current_function_outgoing_args_size); + total_size = IA64_STACK_ALIGN (total_size); + + /* We always use the 16-byte scratch area provided by the caller, but + if we are a leaf function, there's no one to which we need to provide + a scratch area. */ + if (current_function_is_leaf) + total_size = MAX (0, total_size - 16); + + current_frame_info.total_size = total_size; + current_frame_info.spill_cfa_off = pretend_args_size - 16; + current_frame_info.spill_size = spill_size; + current_frame_info.extra_spill_size = extra_spill_size; + COPY_HARD_REG_SET (current_frame_info.mask, mask); + current_frame_info.n_spilled = n_spilled; + current_frame_info.initialized = reload_completed; +} + +/* Compute the initial difference between the specified pair of registers. */ + +HOST_WIDE_INT +ia64_initial_elimination_offset (from, to) + int from, to; +{ + HOST_WIDE_INT offset; + + ia64_compute_frame_size (get_frame_size ()); + switch (from) + { + case FRAME_POINTER_REGNUM: + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (current_function_is_leaf) + offset = -current_frame_info.total_size; + else + offset = -(current_frame_info.total_size + - current_function_outgoing_args_size - 16); + } + else if (to == STACK_POINTER_REGNUM) + { + if (current_function_is_leaf) + offset = 0; + else + offset = 16 + current_function_outgoing_args_size; + } + else + abort (); + break; + + case ARG_POINTER_REGNUM: + /* Arguments start above the 16 byte save area, unless stdarg + in which case we store through the 16 byte save area. */ + if (to == HARD_FRAME_POINTER_REGNUM) + offset = 16 - current_function_pretend_args_size; + else if (to == STACK_POINTER_REGNUM) + offset = (current_frame_info.total_size + + 16 - current_function_pretend_args_size); + else + abort (); + break; + + case RETURN_ADDRESS_POINTER_REGNUM: + offset = 0; + break; + + default: + abort (); + } + + return offset; +} + +/* If there are more than a trivial number of register spills, we use + two interleaved iterators so that we can get two memory references + per insn group. + + In order to simplify things in the prologue and epilogue expanders, + we use helper functions to fix up the memory references after the + fact with the appropriate offsets to a POST_MODIFY memory mode. + The following data structure tracks the state of the two iterators + while insns are being emitted. */ + +struct spill_fill_data +{ + rtx init_after; /* point at which to emit initializations */ + rtx init_reg[2]; /* initial base register */ + rtx iter_reg[2]; /* the iterator registers */ + rtx *prev_addr[2]; /* address of last memory use */ + rtx prev_insn[2]; /* the insn corresponding to prev_addr */ + HOST_WIDE_INT prev_off[2]; /* last offset */ + int n_iter; /* number of iterators in use */ + int next_iter; /* next iterator to use */ + unsigned int save_gr_used_mask; +}; + +static struct spill_fill_data spill_fill_data; + +static void +setup_spill_pointers (n_spills, init_reg, cfa_off) + int n_spills; + rtx init_reg; + HOST_WIDE_INT cfa_off; +{ + int i; + + spill_fill_data.init_after = get_last_insn (); + spill_fill_data.init_reg[0] = init_reg; + spill_fill_data.init_reg[1] = init_reg; + spill_fill_data.prev_addr[0] = NULL; + spill_fill_data.prev_addr[1] = NULL; + spill_fill_data.prev_insn[0] = NULL; + spill_fill_data.prev_insn[1] = NULL; + spill_fill_data.prev_off[0] = cfa_off; + spill_fill_data.prev_off[1] = cfa_off; + spill_fill_data.next_iter = 0; + spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; + + spill_fill_data.n_iter = 1 + (n_spills > 2); + for (i = 0; i < spill_fill_data.n_iter; ++i) + { + int regno = next_scratch_gr_reg (); + spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); + current_frame_info.gr_used_mask |= 1 << regno; + } +} + +static void +finish_spill_pointers () +{ + current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; +} + +static rtx +spill_restore_mem (reg, cfa_off) + rtx reg; + HOST_WIDE_INT cfa_off; +{ + int iter = spill_fill_data.next_iter; + HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; + rtx disp_rtx = GEN_INT (disp); + rtx mem; + + if (spill_fill_data.prev_addr[iter]) + { + if (CONST_OK_FOR_N (disp)) + { + *spill_fill_data.prev_addr[iter] + = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], + gen_rtx_PLUS (DImode, + spill_fill_data.iter_reg[iter], + disp_rtx)); + REG_NOTES (spill_fill_data.prev_insn[iter]) + = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter], + REG_NOTES (spill_fill_data.prev_insn[iter])); + } + else + { + /* ??? Could use register post_modify for loads. */ + if (! CONST_OK_FOR_I (disp)) + { + rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); + emit_move_insn (tmp, disp_rtx); + disp_rtx = tmp; + } + emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], + spill_fill_data.iter_reg[iter], disp_rtx)); + } + } + /* Micro-optimization: if we've created a frame pointer, it's at + CFA 0, which may allow the real iterator to be initialized lower, + slightly increasing parallelism. Also, if there are few saves + it may eliminate the iterator entirely. */ + else if (disp == 0 + && spill_fill_data.init_reg[iter] == stack_pointer_rtx + && frame_pointer_needed) + { + mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); + set_mem_alias_set (mem, get_varargs_alias_set ()); + return mem; + } + else + { + rtx seq, insn; + + if (disp == 0) + seq = gen_movdi (spill_fill_data.iter_reg[iter], + spill_fill_data.init_reg[iter]); + else + { + start_sequence (); + + if (! CONST_OK_FOR_I (disp)) + { + rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); + emit_move_insn (tmp, disp_rtx); + disp_rtx = tmp; + } + + emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], + spill_fill_data.init_reg[iter], + disp_rtx)); + + seq = gen_sequence (); + end_sequence (); + } + + /* Careful for being the first insn in a sequence. */ + if (spill_fill_data.init_after) + insn = emit_insn_after (seq, spill_fill_data.init_after); + else + { + rtx first = get_insns (); + if (first) + insn = emit_insn_before (seq, first); + else + insn = emit_insn (seq); + } + spill_fill_data.init_after = insn; + + /* If DISP is 0, we may or may not have a further adjustment + afterward. If we do, then the load/store insn may be modified + to be a post-modify. If we don't, then this copy may be + eliminated by copyprop_hardreg_forward, which makes this + insn garbage, which runs afoul of the sanity check in + propagate_one_insn. So mark this insn as legal to delete. */ + if (disp == 0) + REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, + REG_NOTES (insn)); + } + + mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); + + /* ??? Not all of the spills are for varargs, but some of them are. + The rest of the spills belong in an alias set of their own. But + it doesn't actually hurt to include them here. */ + set_mem_alias_set (mem, get_varargs_alias_set ()); + + spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); + spill_fill_data.prev_off[iter] = cfa_off; + + if (++iter >= spill_fill_data.n_iter) + iter = 0; + spill_fill_data.next_iter = iter; + + return mem; +} + +static void +do_spill (move_fn, reg, cfa_off, frame_reg) + rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); + rtx reg, frame_reg; + HOST_WIDE_INT cfa_off; +{ + int iter = spill_fill_data.next_iter; + rtx mem, insn; + + mem = spill_restore_mem (reg, cfa_off); + insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); + spill_fill_data.prev_insn[iter] = insn; + + if (frame_reg) + { + rtx base; + HOST_WIDE_INT off; + + RTX_FRAME_RELATED_P (insn) = 1; + + /* Don't even pretend that the unwind code can intuit its way + through a pair of interleaved post_modify iterators. Just + provide the correct answer. */ + + if (frame_pointer_needed) + { + base = hard_frame_pointer_rtx; + off = - cfa_off; + } + else + { + base = stack_pointer_rtx; + off = current_frame_info.total_size - cfa_off; + } + + REG_NOTES (insn) + = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (GET_MODE (reg), + plus_constant (base, off)), + frame_reg), + REG_NOTES (insn)); + } +} + +static void +do_restore (move_fn, reg, cfa_off) + rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); + rtx reg; + HOST_WIDE_INT cfa_off; +{ + int iter = spill_fill_data.next_iter; + rtx insn; + + insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), + GEN_INT (cfa_off))); + spill_fill_data.prev_insn[iter] = insn; +} + +/* Wrapper functions that discards the CONST_INT spill offset. These + exist so that we can give gr_spill/gr_fill the offset they need and + use a consistant function interface. */ + +static rtx +gen_movdi_x (dest, src, offset) + rtx dest, src; + rtx offset ATTRIBUTE_UNUSED; +{ + return gen_movdi (dest, src); +} + +static rtx +gen_fr_spill_x (dest, src, offset) + rtx dest, src; + rtx offset ATTRIBUTE_UNUSED; +{ + return gen_fr_spill (dest, src); +} + +static rtx +gen_fr_restore_x (dest, src, offset) + rtx dest, src; + rtx offset ATTRIBUTE_UNUSED; +{ + return gen_fr_restore (dest, src); +} + +/* Called after register allocation to add any instructions needed for the + prologue. Using a prologue insn is favored compared to putting all of the + instructions in output_function_prologue(), since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. + + Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 + so that the debug info generation code can handle them properly. + + The register save area is layed out like so: + cfa+16 + [ varargs spill area ] + [ fr register spill area ] + [ br register spill area ] + [ ar register spill area ] + [ pr register spill area ] + [ gr register spill area ] */ + +/* ??? Get inefficient code when the frame size is larger than can fit in an + adds instruction. */ + +void +ia64_expand_prologue () +{ + rtx insn, ar_pfs_save_reg, ar_unat_save_reg; + int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; + rtx reg, alt_reg; + + ia64_compute_frame_size (get_frame_size ()); + last_scratch_gr_reg = 15; + + /* If there is no epilogue, then we don't need some prologue insns. + We need to avoid emitting the dead prologue insns, because flow + will complain about them. */ + if (optimize) + { + edge e; + + for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) + if ((e->flags & EDGE_FAKE) == 0 + && (e->flags & EDGE_FALLTHRU) != 0) + break; + epilogue_p = (e != NULL); + } + else + epilogue_p = 1; + + /* Set the local, input, and output register names. We need to do this + for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in + half. If we use in/loc/out register names, then we get assembler errors + in crtn.S because there is no alloc insn or regstk directive in there. */ + if (! TARGET_REG_NAMES) + { + int inputs = current_frame_info.n_input_regs; + int locals = current_frame_info.n_local_regs; + int outputs = current_frame_info.n_output_regs; + + for (i = 0; i < inputs; i++) + reg_names[IN_REG (i)] = ia64_reg_numbers[i]; + for (i = 0; i < locals; i++) + reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; + for (i = 0; i < outputs; i++) + reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; + } + + /* Set the frame pointer register name. The regnum is logically loc79, + but of course we'll not have allocated that many locals. Rather than + worrying about renumbering the existing rtxs, we adjust the name. */ + /* ??? This code means that we can never use one local register when + there is a frame pointer. loc79 gets wasted in this case, as it is + renamed to a register that will never be used. See also the try_locals + code in find_gr_spill. */ + if (current_frame_info.reg_fp) + { + const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; + reg_names[HARD_FRAME_POINTER_REGNUM] + = reg_names[current_frame_info.reg_fp]; + reg_names[current_frame_info.reg_fp] = tmp; + } + + /* Fix up the return address placeholder. */ + /* ??? We can fail if __builtin_return_address is used, and we didn't + allocate a register in which to save b0. I can't think of a way to + eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and + then be sure that I got the right one. Further, reload doesn't seem + to care if an eliminable register isn't used, and "eliminates" it + anyway. */ + if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM] + && current_frame_info.reg_save_b0 != 0) + XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0; + + /* We don't need an alloc instruction if we've used no outputs or locals. */ + if (current_frame_info.n_local_regs == 0 + && current_frame_info.n_output_regs == 0 + && current_frame_info.n_input_regs <= current_function_args_info.words) + { + /* If there is no alloc, but there are input registers used, then we + need a .regstk directive. */ + current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); + ar_pfs_save_reg = NULL_RTX; + } + else + { + current_frame_info.need_regstk = 0; + + if (current_frame_info.reg_save_ar_pfs) + regno = current_frame_info.reg_save_ar_pfs; + else + regno = next_scratch_gr_reg (); + ar_pfs_save_reg = gen_rtx_REG (DImode, regno); + + insn = emit_insn (gen_alloc (ar_pfs_save_reg, + GEN_INT (current_frame_info.n_input_regs), + GEN_INT (current_frame_info.n_local_regs), + GEN_INT (current_frame_info.n_output_regs), + GEN_INT (current_frame_info.n_rotate_regs))); + RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0); + } + + /* Set up frame pointer, stack pointer, and spill iterators. */ + + n_varargs = cfun->machine->n_varargs; + setup_spill_pointers (current_frame_info.n_spilled + n_varargs, + stack_pointer_rtx, 0); + + if (frame_pointer_needed) + { + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (current_frame_info.total_size != 0) + { + rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); + rtx offset; + + if (CONST_OK_FOR_I (- current_frame_info.total_size)) + offset = frame_size_rtx; + else + { + regno = next_scratch_gr_reg (); + offset = gen_rtx_REG (DImode, regno); + emit_move_insn (offset, frame_size_rtx); + } + + insn = emit_insn (gen_adddi3 (stack_pointer_rtx, + stack_pointer_rtx, offset)); + + if (! frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (GET_CODE (offset) != CONST_INT) + { + REG_NOTES (insn) + = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + gen_rtx_PLUS (DImode, + stack_pointer_rtx, + frame_size_rtx)), + REG_NOTES (insn)); + } + } + + /* ??? At this point we must generate a magic insn that appears to + modify the stack pointer, the frame pointer, and all spill + iterators. This would allow the most scheduling freedom. For + now, just hard stop. */ + emit_insn (gen_blockage ()); + } + + /* Must copy out ar.unat before doing any integer spills. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) + { + if (current_frame_info.reg_save_ar_unat) + ar_unat_save_reg + = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); + else + { + alt_regno = next_scratch_gr_reg (); + ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); + current_frame_info.gr_used_mask |= 1 << alt_regno; + } + + reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); + insn = emit_move_insn (ar_unat_save_reg, reg); + RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0); + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p && current_frame_info.reg_save_ar_unat) + emit_insn (gen_prologue_use (ar_unat_save_reg)); + } + else + ar_unat_save_reg = NULL_RTX; + + /* Spill all varargs registers. Do this before spilling any GR registers, + since we want the UNAT bits for the GR registers to override the UNAT + bits from varargs, which we don't care about. */ + + cfa_off = -16; + for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) + { + reg = gen_rtx_REG (DImode, regno); + do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); + } + + /* Locate the bottom of the register save area. */ + cfa_off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size + + current_frame_info.extra_spill_size); + + /* Save the predicate register block either in a register or in memory. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) + { + reg = gen_rtx_REG (DImode, PR_REG (0)); + if (current_frame_info.reg_save_pr != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); + insn = emit_move_insn (alt_reg, reg); + + /* ??? Denote pr spill/fill by a DImode move that modifies all + 64 hard registers. */ + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) + = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, alt_reg, reg), + REG_NOTES (insn)); + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p) + emit_insn (gen_prologue_use (alt_reg)); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + insn = emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + } + + /* Handle AR regs in numerical order. All of them get special handling. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) + && current_frame_info.reg_save_ar_unat == 0) + { + reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); + do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); + cfa_off -= 8; + } + + /* The alloc insn already copied ar.pfs into a general register. The + only thing we have to do now is copy that register to a stack slot + if we'd not allocated a local register for the job. */ + if (current_frame_info.reg_save_ar_pfs == 0 + && ! current_function_is_leaf) + { + reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); + do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); + cfa_off -= 8; + } + + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) + { + reg = gen_rtx_REG (DImode, AR_LC_REGNUM); + if (current_frame_info.reg_save_ar_lc != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); + insn = emit_move_insn (alt_reg, reg); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p) + emit_insn (gen_prologue_use (alt_reg)); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + } + + /* We should now be at the base of the gr/br/fr spill area. */ + if (cfa_off != (current_frame_info.spill_cfa_off + + current_frame_info.spill_size)) + abort (); + + /* Spill all general registers. */ + for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + reg = gen_rtx_REG (DImode, regno); + do_spill (gen_gr_spill, reg, cfa_off, reg); + cfa_off -= 8; + } + + /* Handle BR0 specially -- it may be getting stored permanently in + some GR register. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + reg = gen_rtx_REG (DImode, BR_REG (0)); + if (current_frame_info.reg_save_b0 != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); + insn = emit_move_insn (alt_reg, reg); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p) + emit_insn (gen_prologue_use (alt_reg)); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + } + + /* Spill the rest of the BR registers. */ + for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + reg = gen_rtx_REG (DImode, regno); + emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + + /* Align the frame and spill all FR registers. */ + for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + if (cfa_off & 15) + abort (); + reg = gen_rtx_REG (TFmode, regno); + do_spill (gen_fr_spill_x, reg, cfa_off, reg); + cfa_off -= 16; + } + + if (cfa_off != current_frame_info.spill_cfa_off) + abort (); + + finish_spill_pointers (); +} + +/* Called after register allocation to add any instructions needed for the + epilogue. Using an epilogue insn is favored compared to putting all of the + instructions in output_function_prologue(), since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. */ + +void +ia64_expand_epilogue (sibcall_p) + int sibcall_p; +{ + rtx insn, reg, alt_reg, ar_unat_save_reg; + int regno, alt_regno, cfa_off; + + ia64_compute_frame_size (get_frame_size ()); + + /* If there is a frame pointer, then we use it instead of the stack + pointer, so that the stack pointer does not need to be valid when + the epilogue starts. See EXIT_IGNORE_STACK. */ + if (frame_pointer_needed) + setup_spill_pointers (current_frame_info.n_spilled, + hard_frame_pointer_rtx, 0); + else + setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, + current_frame_info.total_size); + + if (current_frame_info.total_size != 0) + { + /* ??? At this point we must generate a magic insn that appears to + modify the spill iterators and the frame pointer. This would + allow the most scheduling freedom. For now, just hard stop. */ + emit_insn (gen_blockage ()); + } + + /* Locate the bottom of the register save area. */ + cfa_off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size + + current_frame_info.extra_spill_size); + + /* Restore the predicate registers. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) + { + if (current_frame_info.reg_save_pr != 0) + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + } + reg = gen_rtx_REG (DImode, PR_REG (0)); + emit_move_insn (reg, alt_reg); + } + + /* Restore the application registers. */ + + /* Load the saved unat from the stack, but do not restore it until + after the GRs have been restored. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) + { + if (current_frame_info.reg_save_ar_unat != 0) + ar_unat_save_reg + = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); + else + { + alt_regno = next_scratch_gr_reg (); + ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); + current_frame_info.gr_used_mask |= 1 << alt_regno; + do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); + cfa_off -= 8; + } + } + else + ar_unat_save_reg = NULL_RTX; + + if (current_frame_info.reg_save_ar_pfs != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs); + reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); + emit_move_insn (reg, alt_reg); + } + else if (! current_function_is_leaf) + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); + emit_move_insn (reg, alt_reg); + } + + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) + { + if (current_frame_info.reg_save_ar_lc != 0) + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + } + reg = gen_rtx_REG (DImode, AR_LC_REGNUM); + emit_move_insn (reg, alt_reg); + } + + /* We should now be at the base of the gr/br/fr spill area. */ + if (cfa_off != (current_frame_info.spill_cfa_off + + current_frame_info.spill_size)) + abort (); + + /* Restore all general registers. */ + for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + reg = gen_rtx_REG (DImode, regno); + do_restore (gen_gr_restore, reg, cfa_off); + cfa_off -= 8; + } + + /* Restore the branch registers. Handle B0 specially, as it may + have gotten stored in some GR register. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + if (current_frame_info.reg_save_b0 != 0) + alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + } + reg = gen_rtx_REG (DImode, BR_REG (0)); + emit_move_insn (reg, alt_reg); + } + + for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + reg = gen_rtx_REG (DImode, regno); + emit_move_insn (reg, alt_reg); + } + + /* Restore floating point registers. */ + for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + if (cfa_off & 15) + abort (); + reg = gen_rtx_REG (TFmode, regno); + do_restore (gen_fr_restore_x, reg, cfa_off); + cfa_off -= 16; + } + + /* Restore ar.unat for real. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) + { + reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); + emit_move_insn (reg, ar_unat_save_reg); + } + + if (cfa_off != current_frame_info.spill_cfa_off) + abort (); + + finish_spill_pointers (); + + if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp) + { + /* ??? At this point we must generate a magic insn that appears to + modify the spill iterators, the stack pointer, and the frame + pointer. This would allow the most scheduling freedom. For now, + just hard stop. */ + emit_insn (gen_blockage ()); + } + + if (cfun->machine->ia64_eh_epilogue_sp) + emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); + else if (frame_pointer_needed) + { + insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (current_frame_info.total_size) + { + rtx offset, frame_size_rtx; + + frame_size_rtx = GEN_INT (current_frame_info.total_size); + if (CONST_OK_FOR_I (current_frame_info.total_size)) + offset = frame_size_rtx; + else + { + regno = next_scratch_gr_reg (); + offset = gen_rtx_REG (DImode, regno); + emit_move_insn (offset, frame_size_rtx); + } + + insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + offset)); + + RTX_FRAME_RELATED_P (insn) = 1; + if (GET_CODE (offset) != CONST_INT) + { + REG_NOTES (insn) + = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + gen_rtx_PLUS (DImode, + stack_pointer_rtx, + frame_size_rtx)), + REG_NOTES (insn)); + } + } + + if (cfun->machine->ia64_eh_epilogue_bsp) + emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); + + if (! sibcall_p) + emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); + else + { + int fp = GR_REG (2); + /* We need a throw away register here, r0 and r1 are reserved, so r2 is the + first available call clobbered register. If there was a frame_pointer + register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, + so we have to make sure we're using the string "r2" when emitting + the register name for the assmbler. */ + if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2)) + fp = HARD_FRAME_POINTER_REGNUM; + + /* We must emit an alloc to force the input registers to become output + registers. Otherwise, if the callee tries to pass its parameters + through to another call without an intervening alloc, then these + values get lost. */ + /* ??? We don't need to preserve all input registers. We only need to + preserve those input registers used as arguments to the sibling call. + It is unclear how to compute that number here. */ + if (current_frame_info.n_input_regs != 0) + emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), + GEN_INT (0), GEN_INT (0), + GEN_INT (current_frame_info.n_input_regs), + GEN_INT (0))); + } +} + +/* Return 1 if br.ret can do all the work required to return from a + function. */ + +int +ia64_direct_return () +{ + if (reload_completed && ! frame_pointer_needed) + { + ia64_compute_frame_size (get_frame_size ()); + + return (current_frame_info.total_size == 0 + && current_frame_info.n_spilled == 0 + && current_frame_info.reg_save_b0 == 0 + && current_frame_info.reg_save_pr == 0 + && current_frame_info.reg_save_ar_pfs == 0 + && current_frame_info.reg_save_ar_unat == 0 + && current_frame_info.reg_save_ar_lc == 0); + } + return 0; +} + +int +ia64_hard_regno_rename_ok (from, to) + int from; + int to; +{ + /* Don't clobber any of the registers we reserved for the prologue. */ + if (to == current_frame_info.reg_fp + || to == current_frame_info.reg_save_b0 + || to == current_frame_info.reg_save_pr + || to == current_frame_info.reg_save_ar_pfs + || to == current_frame_info.reg_save_ar_unat + || to == current_frame_info.reg_save_ar_lc) + return 0; + + if (from == current_frame_info.reg_fp + || from == current_frame_info.reg_save_b0 + || from == current_frame_info.reg_save_pr + || from == current_frame_info.reg_save_ar_pfs + || from == current_frame_info.reg_save_ar_unat + || from == current_frame_info.reg_save_ar_lc) + return 0; + + /* Don't use output registers outside the register frame. */ + if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) + return 0; + + /* Retain even/oddness on predicate register pairs. */ + if (PR_REGNO_P (from) && PR_REGNO_P (to)) + return (from & 1) == (to & 1); + + /* Reg 4 contains the saved gp; we can't reliably rename this. */ + if (from == GR_REG (4) && current_function_calls_setjmp) + return 0; + + return 1; +} + +/* Target hook for assembling integer objects. Handle word-sized + aligned objects and detect the cases when @fptr is needed. */ + +static bool +ia64_assemble_integer (x, size, aligned_p) + rtx x; + unsigned int size; + int aligned_p; +{ + if (size == UNITS_PER_WORD && aligned_p + && !(TARGET_NO_PIC || TARGET_AUTO_PIC) + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FLAG (x)) + { + fputs ("\tdata8\t@fptr(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")\n", asm_out_file); + return true; + } + return default_assemble_integer (x, size, aligned_p); +} + +/* Emit the function prologue. */ + +static void +ia64_output_function_prologue (file, size) + FILE *file; + HOST_WIDE_INT size ATTRIBUTE_UNUSED; +{ + int mask, grsave, grsave_prev; + + if (current_frame_info.need_regstk) + fprintf (file, "\t.regstk %d, %d, %d, %d\n", + current_frame_info.n_input_regs, + current_frame_info.n_local_regs, + current_frame_info.n_output_regs, + current_frame_info.n_rotate_regs); + + if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) + return; + + /* Emit the .prologue directive. */ + + mask = 0; + grsave = grsave_prev = 0; + if (current_frame_info.reg_save_b0 != 0) + { + mask |= 8; + grsave = grsave_prev = current_frame_info.reg_save_b0; + } + if (current_frame_info.reg_save_ar_pfs != 0 + && (grsave_prev == 0 + || current_frame_info.reg_save_ar_pfs == grsave_prev + 1)) + { + mask |= 4; + if (grsave_prev == 0) + grsave = current_frame_info.reg_save_ar_pfs; + grsave_prev = current_frame_info.reg_save_ar_pfs; + } + if (current_frame_info.reg_fp != 0 + && (grsave_prev == 0 + || current_frame_info.reg_fp == grsave_prev + 1)) + { + mask |= 2; + if (grsave_prev == 0) + grsave = HARD_FRAME_POINTER_REGNUM; + grsave_prev = current_frame_info.reg_fp; + } + if (current_frame_info.reg_save_pr != 0 + && (grsave_prev == 0 + || current_frame_info.reg_save_pr == grsave_prev + 1)) + { + mask |= 1; + if (grsave_prev == 0) + grsave = current_frame_info.reg_save_pr; + } + + if (mask) + fprintf (file, "\t.prologue %d, %d\n", mask, + ia64_dbx_register_number (grsave)); + else + fputs ("\t.prologue\n", file); + + /* Emit a .spill directive, if necessary, to relocate the base of + the register spill area. */ + if (current_frame_info.spill_cfa_off != -16) + fprintf (file, "\t.spill %ld\n", + (long) (current_frame_info.spill_cfa_off + + current_frame_info.spill_size)); +} + +/* Emit the .body directive at the scheduled end of the prologue. */ + +static void +ia64_output_function_end_prologue (file) + FILE *file; +{ + if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) + return; + + fputs ("\t.body\n", file); +} + +/* Emit the function epilogue. */ + +static void +ia64_output_function_epilogue (file, size) + FILE *file ATTRIBUTE_UNUSED; + HOST_WIDE_INT size ATTRIBUTE_UNUSED; +{ + int i; + + /* Reset from the function's potential modifications. */ + XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM; + + if (current_frame_info.reg_fp) + { + const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; + reg_names[HARD_FRAME_POINTER_REGNUM] + = reg_names[current_frame_info.reg_fp]; + reg_names[current_frame_info.reg_fp] = tmp; + } + if (! TARGET_REG_NAMES) + { + for (i = 0; i < current_frame_info.n_input_regs; i++) + reg_names[IN_REG (i)] = ia64_input_reg_names[i]; + for (i = 0; i < current_frame_info.n_local_regs; i++) + reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; + for (i = 0; i < current_frame_info.n_output_regs; i++) + reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; + } + + current_frame_info.initialized = 0; +} + +int +ia64_dbx_register_number (regno) + int regno; +{ + /* In ia64_expand_prologue we quite literally renamed the frame pointer + from its home at loc79 to something inside the register frame. We + must perform the same renumbering here for the debug info. */ + if (current_frame_info.reg_fp) + { + if (regno == HARD_FRAME_POINTER_REGNUM) + regno = current_frame_info.reg_fp; + else if (regno == current_frame_info.reg_fp) + regno = HARD_FRAME_POINTER_REGNUM; + } + + if (IN_REGNO_P (regno)) + return 32 + regno - IN_REG (0); + else if (LOC_REGNO_P (regno)) + return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); + else if (OUT_REGNO_P (regno)) + return (32 + current_frame_info.n_input_regs + + current_frame_info.n_local_regs + regno - OUT_REG (0)); + else + return regno; +} + +void +ia64_initialize_trampoline (addr, fnaddr, static_chain) + rtx addr, fnaddr, static_chain; +{ + rtx addr_reg, eight = GEN_INT (8); + + /* Load up our iterator. */ + addr_reg = gen_reg_rtx (Pmode); + emit_move_insn (addr_reg, addr); + + /* The first two words are the fake descriptor: + __ia64_trampoline, ADDR+16. */ + emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), + gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline")); + emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); + + emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), + copy_to_reg (plus_constant (addr, 16))); + emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); + + /* The third word is the target descriptor. */ + emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr); + emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); + + /* The fourth word is the static chain. */ + emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain); +} + +/* Do any needed setup for a variadic function. CUM has not been updated + for the last named argument which has type TYPE and mode MODE. + + We generate the actual spill instructions during prologue generation. */ + +void +ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) + CUMULATIVE_ARGS cum; + int int_mode; + tree type; + int * pretend_size; + int second_time ATTRIBUTE_UNUSED; +{ + /* If this is a stdarg function, then skip the current argument. */ + if (! current_function_varargs) + ia64_function_arg_advance (&cum, int_mode, type, 1); + + if (cum.words < MAX_ARGUMENT_SLOTS) + { + int n = MAX_ARGUMENT_SLOTS - cum.words; + *pretend_size = n * UNITS_PER_WORD; + cfun->machine->n_varargs = n; + } +} + +/* Check whether TYPE is a homogeneous floating point aggregate. If + it is, return the mode of the floating point type that appears + in all leafs. If it is not, return VOIDmode. + + An aggregate is a homogeneous floating point aggregate is if all + fields/elements in it have the same floating point type (e.g, + SFmode). 128-bit quad-precision floats are excluded. */ + +static enum machine_mode +hfa_element_mode (type, nested) + tree type; + int nested; +{ + enum machine_mode element_mode = VOIDmode; + enum machine_mode mode; + enum tree_code code = TREE_CODE (type); + int know_element_mode = 0; + tree t; + + switch (code) + { + case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: + case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: + case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: + case FILE_TYPE: case SET_TYPE: case LANG_TYPE: + case FUNCTION_TYPE: + return VOIDmode; + + /* Fortran complex types are supposed to be HFAs, so we need to handle + gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex + types though. */ + case COMPLEX_TYPE: + if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT) + return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) + * BITS_PER_UNIT, MODE_FLOAT, 0); + else + return VOIDmode; + + case REAL_TYPE: + /* ??? Should exclude 128-bit long double here. */ + /* We want to return VOIDmode for raw REAL_TYPEs, but the actual + mode if this is contained within an aggregate. */ + if (nested) + return TYPE_MODE (type); + else + return VOIDmode; + + case ARRAY_TYPE: + return TYPE_MODE (TREE_TYPE (type)); + + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) + { + if (TREE_CODE (t) != FIELD_DECL) + continue; + + mode = hfa_element_mode (TREE_TYPE (t), 1); + if (know_element_mode) + { + if (mode != element_mode) + return VOIDmode; + } + else if (GET_MODE_CLASS (mode) != MODE_FLOAT) + return VOIDmode; + else + { + know_element_mode = 1; + element_mode = mode; + } + } + return element_mode; + + default: + /* If we reach here, we probably have some front-end specific type + that the backend doesn't know about. This can happen via the + aggregate_value_p call in init_function_start. All we can do is + ignore unknown tree types. */ + return VOIDmode; + } + + return VOIDmode; +} + +/* Return rtx for register where argument is passed, or zero if it is passed + on the stack. */ + +/* ??? 128-bit quad-precision floats are always passed in general + registers. */ + +rtx +ia64_function_arg (cum, mode, type, named, incoming) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; + int incoming; +{ + int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + enum machine_mode hfa_mode = VOIDmode; + + /* Integer and float arguments larger than 8 bytes start at the next even + boundary. Aggregates larger than 8 bytes start at the next even boundary + if the aggregate has 16 byte alignment. Net effect is that types with + alignment greater than 8 start at the next even boundary. */ + /* ??? The ABI does not specify how to handle aggregates with alignment from + 9 to 15 bytes, or greater than 16. We handle them all as if they had + 16 byte alignment. Such aggregates can occur only if gcc extensions are + used. */ + if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) + : (words > 1)) + && (cum->words & 1)) + offset = 1; + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* Check for and handle homogeneous FP aggregates. */ + if (type) + hfa_mode = hfa_element_mode (type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || named)) + { + rtx loc[16]; + int i = 0; + int fp_regs = cum->fp_regs; + int int_regs = cum->words + offset; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, (FR_ARG_FIRST + + fp_regs)), + GEN_INT (offset)); + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + /* If no prototype, then the whole thing must go in GR regs. */ + if (! cum->prototype) + offset = 0; + /* If this is an SFmode aggregate, then we might have some left over + that needs to go in GR regs. */ + else if (byte_size != offset) + int_regs += offset / UNITS_PER_WORD; + + /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ + + for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) + { + enum machine_mode gr_mode = DImode; + + /* If we have an odd 4 byte hunk because we ran out of FR regs, + then this goes in a GR reg left adjusted/little endian, right + adjusted/big endian. */ + /* ??? Currently this is handled wrong, because 4-byte hunks are + always right adjusted/little endian. */ + if (offset & 0x4) + gr_mode = SImode; + /* If we have an even 4 byte hunk because the aggregate is a + multiple of 4 bytes in size, then this goes in a GR reg right + adjusted/little endian. */ + else if (byte_size - offset == 4) + gr_mode = SImode; + /* Complex floats need to have float mode. */ + if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + gr_mode = hfa_mode; + + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (gr_mode, (basereg + + int_regs)), + GEN_INT (offset)); + offset += GET_MODE_SIZE (gr_mode); + int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD + ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD; + } + + /* If we ended up using just one location, just return that one loc. */ + if (i == 1) + return XEXP (loc[0], 0); + else + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT) + || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) + return gen_rtx_REG (mode, basereg + cum->words + offset); + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR registeer when unnamed. */ + else if (cum->prototype) + { + if (! named) + return gen_rtx_REG (mode, basereg + cum->words + offset); + else + return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + { + rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, (FR_ARG_FIRST + + cum->fp_regs)), + const0_rtx); + rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, + (basereg + cum->words + + offset)), + const0_rtx); + + return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); + } +} + +/* Return number of words, at the beginning of the argument, that must be + put in registers. 0 is the argument is entirely in registers or entirely + in memory. */ + +int +ia64_function_arg_partial_nregs (cum, mode, type, named) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named ATTRIBUTE_UNUSED; +{ + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + + /* Arguments with alignment larger than 8 bytes start at the next even + boundary. */ + if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) + : (words > 1)) + && (cum->words & 1)) + offset = 1; + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* It doesn't matter whether the argument goes in FR or GR regs. If + it fits within the 8 argument slots, then it goes entirely in + registers. If it extends past the last argument slot, then the rest + goes on the stack. */ + + if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) + return 0; + + return MAX_ARGUMENT_SLOTS - cum->words - offset; +} + +/* Update CUM to point after this argument. This is patterned after + ia64_function_arg. */ + +void +ia64_function_arg_advance (cum, mode, type, named) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; +{ + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + enum machine_mode hfa_mode = VOIDmode; + + /* If all arg slots are already full, then there is nothing to do. */ + if (cum->words >= MAX_ARGUMENT_SLOTS) + return; + + /* Arguments with alignment larger than 8 bytes start at the next even + boundary. */ + if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) + : (words > 1)) + && (cum->words & 1)) + offset = 1; + + cum->words += words + offset; + + /* Check for and handle homogeneous FP aggregates. */ + if (type) + hfa_mode = hfa_element_mode (type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || named)) + { + int fp_regs = cum->fp_regs; + /* This is the original value of cum->words + offset. */ + int int_regs = cum->words - words; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) + { + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + cum->fp_regs = fp_regs; + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) + return; + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR registeer when unnamed. */ + else if (cum->prototype) + { + if (! named) + return; + else + /* ??? Complex types should not reach here. */ + cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + /* ??? Complex types should not reach here. */ + cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + + return; +} + +/* Implement va_start. */ + +void +ia64_va_start (stdarg_p, valist, nextarg) + int stdarg_p; + tree valist; + rtx nextarg; +{ + int arg_words; + int ofs; + + arg_words = current_function_args_info.words; + + if (stdarg_p) + ofs = 0; + else + ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0); + + nextarg = plus_constant (nextarg, ofs); + std_expand_builtin_va_start (1, valist, nextarg); +} + +/* Implement va_arg. */ + +rtx +ia64_va_arg (valist, type) + tree valist, type; +{ + tree t; + + /* Arguments with alignment larger than 8 bytes start at the next even + boundary. */ + if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) + { + t = build (PLUS_EXPR, TREE_TYPE (valist), valist, + build_int_2 (2 * UNITS_PER_WORD - 1, 0)); + t = build (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_2 (-2 * UNITS_PER_WORD, -1)); + t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + return std_expand_builtin_va_arg (valist, type); +} + +/* Return 1 if function return value returned in memory. Return 0 if it is + in a register. */ + +int +ia64_return_in_memory (valtype) + tree valtype; +{ + enum machine_mode mode; + enum machine_mode hfa_mode; + HOST_WIDE_INT byte_size; + + mode = TYPE_MODE (valtype); + byte_size = GET_MODE_SIZE (mode); + if (mode == BLKmode) + { + byte_size = int_size_in_bytes (valtype); + if (byte_size < 0) + return 1; + } + + /* Hfa's with up to 8 elements are returned in the FP argument registers. */ + + hfa_mode = hfa_element_mode (valtype, 0); + if (hfa_mode != VOIDmode) + { + int hfa_size = GET_MODE_SIZE (hfa_mode); + + if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) + return 1; + else + return 0; + } + else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) + return 1; + else + return 0; +} + +/* Return rtx for register that holds the function return value. */ + +rtx +ia64_function_value (valtype, func) + tree valtype; + tree func ATTRIBUTE_UNUSED; +{ + enum machine_mode mode; + enum machine_mode hfa_mode; + + mode = TYPE_MODE (valtype); + hfa_mode = hfa_element_mode (valtype, 0); + + if (hfa_mode != VOIDmode) + { + rtx loc[8]; + int i; + int hfa_size; + int byte_size; + int offset; + + hfa_size = GET_MODE_SIZE (hfa_mode); + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); + offset = 0; + for (i = 0; offset < byte_size; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), + GEN_INT (offset)); + offset += hfa_size; + } + + if (i == 1) + return XEXP (loc[0], 0); + else + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + else if (FLOAT_TYPE_P (valtype) && + ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT)) + return gen_rtx_REG (mode, FR_ARG_FIRST); + else + return gen_rtx_REG (mode, GR_RET_FIRST); +} + +/* Print a memory address as an operand to reference that memory location. */ + +/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps + also call this from ia64_print_operand for memory addresses. */ + +void +ia64_print_operand_address (stream, address) + FILE * stream ATTRIBUTE_UNUSED; + rtx address ATTRIBUTE_UNUSED; +{ +} + +/* Print an operand to an assembler instruction. + C Swap and print a comparison operator. + D Print an FP comparison operator. + E Print 32 - constant, for SImode shifts as extract. + e Print 64 - constant, for DImode rotates. + F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or + a floating point register emitted normally. + I Invert a predicate register by adding 1. + J Select the proper predicate register for a condition. + j Select the inverse predicate register for a condition. + O Append .acq for volatile load. + P Postincrement of a MEM. + Q Append .rel for volatile store. + S Shift amount for shladd instruction. + T Print an 8-bit sign extended number (K) as a 32-bit unsigned number + for Intel assembler. + U Print an 8-bit sign extended number (K) as a 64-bit unsigned number + for Intel assembler. + r Print register name, or constant 0 as r0. HP compatibility for + Linux kernel. */ +void +ia64_print_operand (file, x, code) + FILE * file; + rtx x; + int code; +{ + const char *str; + + switch (code) + { + case 0: + /* Handled below. */ + break; + + case 'C': + { + enum rtx_code c = swap_condition (GET_CODE (x)); + fputs (GET_RTX_NAME (c), file); + return; + } + + case 'D': + switch (GET_CODE (x)) + { + case NE: + str = "neq"; + break; + case UNORDERED: + str = "unord"; + break; + case ORDERED: + str = "ord"; + break; + default: + str = GET_RTX_NAME (GET_CODE (x)); + break; + } + fputs (str, file); + return; + + case 'E': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); + return; + + case 'e': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); + return; + + case 'F': + if (x == CONST0_RTX (GET_MODE (x))) + str = reg_names [FR_REG (0)]; + else if (x == CONST1_RTX (GET_MODE (x))) + str = reg_names [FR_REG (1)]; + else if (GET_CODE (x) == REG) + str = reg_names [REGNO (x)]; + else + abort (); + fputs (str, file); + return; + + case 'I': + fputs (reg_names [REGNO (x) + 1], file); + return; + + case 'J': + case 'j': + { + unsigned int regno = REGNO (XEXP (x, 0)); + if (GET_CODE (x) == EQ) + regno += 1; + if (code == 'j') + regno ^= 1; + fputs (reg_names [regno], file); + } + return; + + case 'O': + if (MEM_VOLATILE_P (x)) + fputs(".acq", file); + return; + + case 'P': + { + HOST_WIDE_INT value; + + switch (GET_CODE (XEXP (x, 0))) + { + default: + return; + + case POST_MODIFY: + x = XEXP (XEXP (XEXP (x, 0), 1), 1); + if (GET_CODE (x) == CONST_INT) + value = INTVAL (x); + else if (GET_CODE (x) == REG) + { + fprintf (file, ", %s", reg_names[REGNO (x)]); + return; + } + else + abort (); + break; + + case POST_INC: + value = GET_MODE_SIZE (GET_MODE (x)); + break; + + case POST_DEC: + value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); + break; + } + + putc (',', file); + putc (' ', file); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, value); + return; + } + + case 'Q': + if (MEM_VOLATILE_P (x)) + fputs(".rel", file); + return; + + case 'S': + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + + case 'T': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'U': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + const char *prefix = "0x"; + if (INTVAL (x) & 0x80000000) + { + fprintf (file, "0xffffffff"); + prefix = ""; + } + fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'r': + /* If this operand is the constant zero, write it as register zero. + Any register, zero, or CONST_INT value is OK here. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)], file); + else if (x == CONST0_RTX (GET_MODE (x))) + fputs ("r0", file); + else if (GET_CODE (x) == CONST_INT) + output_addr_const (file, x); + else + output_operand_lossage ("invalid %%r value"); + return; + + case '+': + { + const char *which; + + /* For conditional branches, returns or calls, substitute + sptk, dptk, dpnt, or spnt for %s. */ + x = find_reg_note (current_output_insn, REG_BR_PROB, 0); + if (x) + { + int pred_val = INTVAL (XEXP (x, 0)); + + /* Guess top and bottom 10% statically predicted. */ + if (pred_val < REG_BR_PROB_BASE / 50) + which = ".spnt"; + else if (pred_val < REG_BR_PROB_BASE / 2) + which = ".dpnt"; + else if (pred_val < REG_BR_PROB_BASE / 100 * 98) + which = ".dptk"; + else + which = ".sptk"; + } + else if (GET_CODE (current_output_insn) == CALL_INSN) + which = ".sptk"; + else + which = ".dptk"; + + fputs (which, file); + return; + } + + case ',': + x = current_insn_predicate; + if (x) + { + unsigned int regno = REGNO (XEXP (x, 0)); + if (GET_CODE (x) == EQ) + regno += 1; + fprintf (file, "(%s) ", reg_names [regno]); + } + return; + + default: + output_operand_lossage ("ia64_print_operand: unknown code"); + return; + } + + switch (GET_CODE (x)) + { + /* This happens for the spill/restore instructions. */ + case POST_INC: + case POST_DEC: + case POST_MODIFY: + x = XEXP (x, 0); + /* ... fall through ... */ + + case REG: + fputs (reg_names [REGNO (x)], file); + break; + + case MEM: + { + rtx addr = XEXP (x, 0); + if (GET_RTX_CLASS (GET_CODE (addr)) == 'a') + addr = XEXP (addr, 0); + fprintf (file, "[%s]", reg_names [REGNO (addr)]); + break; + } + + default: + output_addr_const (file, x); + break; + } + + return; +} + +/* Calulate the cost of moving data from a register in class FROM to + one in class TO, using MODE. */ + +int +ia64_register_move_cost (mode, from, to) + enum machine_mode mode; + enum reg_class from, to; +{ + /* ADDL_REGS is the same as GR_REGS for movement purposes. */ + if (to == ADDL_REGS) + to = GR_REGS; + if (from == ADDL_REGS) + from = GR_REGS; + + /* All costs are symmetric, so reduce cases by putting the + lower number class as the destination. */ + if (from < to) + { + enum reg_class tmp = to; + to = from, from = tmp; + } + + /* Moving from FR<->GR in TFmode must be more expensive than 2, + so that we get secondary memory reloads. Between FR_REGS, + we have to make this at least as expensive as MEMORY_MOVE_COST + to avoid spectacularly poor register class preferencing. */ + if (mode == TFmode) + { + if (to != GR_REGS || from != GR_REGS) + return MEMORY_MOVE_COST (mode, to, 0); + else + return 3; + } + + switch (to) + { + case PR_REGS: + /* Moving between PR registers takes two insns. */ + if (from == PR_REGS) + return 3; + /* Moving between PR and anything but GR is impossible. */ + if (from != GR_REGS) + return MEMORY_MOVE_COST (mode, to, 0); + break; + + case BR_REGS: + /* Moving between BR and anything but GR is impossible. */ + if (from != GR_REGS && from != GR_AND_BR_REGS) + return MEMORY_MOVE_COST (mode, to, 0); + break; + + case AR_I_REGS: + case AR_M_REGS: + /* Moving between AR and anything but GR is impossible. */ + if (from != GR_REGS) + return MEMORY_MOVE_COST (mode, to, 0); + break; + + case GR_REGS: + case FR_REGS: + case GR_AND_FR_REGS: + case GR_AND_BR_REGS: + case ALL_REGS: + break; + + default: + abort (); + } + + return 2; +} + +/* This function returns the register class required for a secondary + register when copying between one of the registers in CLASS, and X, + using MODE. A return value of NO_REGS means that no secondary register + is required. */ + +enum reg_class +ia64_secondary_reload_class (class, mode, x) + enum reg_class class; + enum machine_mode mode ATTRIBUTE_UNUSED; + rtx x; +{ + int regno = -1; + + if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + regno = true_regnum (x); + + switch (class) + { + case BR_REGS: + case AR_M_REGS: + case AR_I_REGS: + /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global + interaction. We end up with two pseudos with overlapping lifetimes + both of which are equiv to the same constant, and both which need + to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end + changes depending on the path length, which means the qty_first_reg + check in make_regs_eqv can give different answers at different times. + At some point I'll probably need a reload_indi pattern to handle + this. + + We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we + wound up with a FP register from GR_AND_FR_REGS. Extend that to all + non-general registers for good measure. */ + if (regno >= 0 && ! GENERAL_REGNO_P (regno)) + return GR_REGS; + + /* This is needed if a pseudo used as a call_operand gets spilled to a + stack slot. */ + if (GET_CODE (x) == MEM) + return GR_REGS; + break; + + case FR_REGS: + /* Need to go through general regsters to get to other class regs. */ + if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) + return GR_REGS; + + /* This can happen when a paradoxical subreg is an operand to the + muldi3 pattern. */ + /* ??? This shouldn't be necessary after instruction scheduling is + enabled, because paradoxical subregs are not accepted by + register_operand when INSN_SCHEDULING is defined. Or alternatively, + stop the paradoxical subreg stupidity in the *_operand functions + in recog.c. */ + if (GET_CODE (x) == MEM + && (GET_MODE (x) == SImode || GET_MODE (x) == HImode + || GET_MODE (x) == QImode)) + return GR_REGS; + + /* This can happen because of the ior/and/etc patterns that accept FP + registers as operands. If the third operand is a constant, then it + needs to be reloaded into a FP register. */ + if (GET_CODE (x) == CONST_INT) + return GR_REGS; + + /* This can happen because of register elimination in a muldi3 insn. + E.g. `26107 * (unsigned long)&u'. */ + if (GET_CODE (x) == PLUS) + return GR_REGS; + break; + + case PR_REGS: + /* ??? This happens if we cse/gcse a BImode value across a call, + and the function has a nonlocal goto. This is because global + does not allocate call crossing pseudos to hard registers when + current_function_has_nonlocal_goto is true. This is relatively + common for C++ programs that use exceptions. To reproduce, + return NO_REGS and compile libstdc++. */ + if (GET_CODE (x) == MEM) + return GR_REGS; + + /* This can happen when we take a BImode subreg of a DImode value, + and that DImode value winds up in some non-GR register. */ + if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) + return GR_REGS; + break; + + case GR_REGS: + /* Since we have no offsettable memory addresses, we need a temporary + to hold the address of the second word. */ + if (mode == TImode) + return GR_REGS; + break; + + default: + break; + } + + return NO_REGS; +} + + +/* Emit text to declare externally defined variables and functions, because + the Intel assembler does not support undefined externals. */ + +void +ia64_asm_output_external (file, decl, name) + FILE *file; + tree decl; + const char *name; +{ + int save_referenced; + + /* GNU as does not need anything here. */ + if (TARGET_GNU_AS) + return; + + /* ??? The Intel assembler creates a reference that needs to be satisfied by + the linker when we do this, so we need to be careful not to do this for + builtin functions which have no library equivalent. Unfortunately, we + can't tell here whether or not a function will actually be called by + expand_expr, so we pull in library functions even if we may not need + them later. */ + if (! strcmp (name, "__builtin_next_arg") + || ! strcmp (name, "alloca") + || ! strcmp (name, "__builtin_constant_p") + || ! strcmp (name, "__builtin_args_info")) + return; + + /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and + restore it. */ + save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); + if (TREE_CODE (decl) == FUNCTION_DECL) + { + fprintf (file, "%s", TYPE_ASM_OP); + assemble_name (file, name); + putc (',', file); + fprintf (file, TYPE_OPERAND_FMT, "function"); + putc ('\n', file); + } + ASM_GLOBALIZE_LABEL (file, name); + TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; +} + +/* Parse the -mfixed-range= option string. */ + +static void +fix_range (const_str) + const char *const_str; +{ + int i, first, last; + char *str, *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. This is + used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ + + i = strlen (const_str); + str = (char *) alloca (i + 1); + memcpy (str, const_str, i + 1); + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning ("value of -mfixed-range must have form REG1-REG2"); + return; + } + *dash = '\0'; + + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning ("unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning ("unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning ("%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = call_used_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } +} + +/* Called to register all of our global variables with the garbage + collector. */ + +static void +ia64_add_gc_roots () +{ + ggc_add_rtx_root (&ia64_compare_op0, 1); + ggc_add_rtx_root (&ia64_compare_op1, 1); +} + +static void +ia64_init_machine_status (p) + struct function *p; +{ + p->machine = + (struct machine_function *) xcalloc (1, sizeof (struct machine_function)); +} + +static void +ia64_mark_machine_status (p) + struct function *p; +{ + struct machine_function *machine = p->machine; + + if (machine) + { + ggc_mark_rtx (machine->ia64_eh_epilogue_sp); + ggc_mark_rtx (machine->ia64_eh_epilogue_bsp); + ggc_mark_rtx (machine->ia64_gp_save); + } +} + +static void +ia64_free_machine_status (p) + struct function *p; +{ + free (p->machine); + p->machine = NULL; +} + +/* Handle TARGET_OPTIONS switches. */ + +void +ia64_override_options () +{ + if (TARGET_AUTO_PIC) + target_flags |= MASK_CONST_GP; + + if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR) + { + warning ("cannot optimize division for both latency and throughput"); + target_flags &= ~MASK_INLINE_DIV_THR; + } + + if (ia64_fixed_range_string) + fix_range (ia64_fixed_range_string); + + ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; + flag_schedule_insns_after_reload = 0; + + ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; + + init_machine_status = ia64_init_machine_status; + mark_machine_status = ia64_mark_machine_status; + free_machine_status = ia64_free_machine_status; + + ia64_add_gc_roots (); +} + +static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx)); +static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx)); +static enum attr_type ia64_safe_type PARAMS((rtx)); + +static enum attr_itanium_requires_unit0 +ia64_safe_itanium_requires_unit0 (insn) + rtx insn; +{ + if (recog_memoized (insn) >= 0) + return get_attr_itanium_requires_unit0 (insn); + else + return ITANIUM_REQUIRES_UNIT0_NO; +} + +static enum attr_itanium_class +ia64_safe_itanium_class (insn) + rtx insn; +{ + if (recog_memoized (insn) >= 0) + return get_attr_itanium_class (insn); + else + return ITANIUM_CLASS_UNKNOWN; +} + +static enum attr_type +ia64_safe_type (insn) + rtx insn; +{ + if (recog_memoized (insn) >= 0) + return get_attr_type (insn); + else + return TYPE_UNKNOWN; +} + +/* The following collection of routines emit instruction group stop bits as + necessary to avoid dependencies. */ + +/* Need to track some additional registers as far as serialization is + concerned so we can properly handle br.call and br.ret. We could + make these registers visible to gcc, but since these registers are + never explicitly used in gcc generated code, it seems wasteful to + do so (plus it would make the call and return patterns needlessly + complex). */ +#define REG_GP (GR_REG (1)) +#define REG_RP (BR_REG (0)) +#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) +/* This is used for volatile asms which may require a stop bit immediately + before and after them. */ +#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) +#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) +#define NUM_REGS (AR_UNAT_BIT_0 + 64) + +/* For each register, we keep track of how it has been written in the + current instruction group. + + If a register is written unconditionally (no qualifying predicate), + WRITE_COUNT is set to 2 and FIRST_PRED is ignored. + + If a register is written if its qualifying predicate P is true, we + set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register + may be written again by the complement of P (P^1) and when this happens, + WRITE_COUNT gets set to 2. + + The result of this is that whenever an insn attempts to write a register + whose WRITE_COUNT is two, we need to issue an insn group barrier first. + + If a predicate register is written by a floating-point insn, we set + WRITTEN_BY_FP to true. + + If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND + to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ + +struct reg_write_state +{ + unsigned int write_count : 2; + unsigned int first_pred : 16; + unsigned int written_by_fp : 1; + unsigned int written_by_and : 1; + unsigned int written_by_or : 1; +}; + +/* Cumulative info for the current instruction group. */ +struct reg_write_state rws_sum[NUM_REGS]; +/* Info for the current instruction. This gets copied to rws_sum after a + stop bit is emitted. */ +struct reg_write_state rws_insn[NUM_REGS]; + +/* Indicates whether this is the first instruction after a stop bit, + in which case we don't need another stop bit. Without this, we hit + the abort in ia64_variable_issue when scheduling an alloc. */ +static int first_instruction; + +/* Misc flags needed to compute RAW/WAW dependencies while we are traversing + RTL for one instruction. */ +struct reg_flags +{ + unsigned int is_write : 1; /* Is register being written? */ + unsigned int is_fp : 1; /* Is register used as part of an fp op? */ + unsigned int is_branch : 1; /* Is register used as part of a branch? */ + unsigned int is_and : 1; /* Is register used as part of and.orcm? */ + unsigned int is_or : 1; /* Is register used as part of or.andcm? */ + unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ +}; + +static void rws_update PARAMS ((struct reg_write_state *, int, + struct reg_flags, int)); +static int rws_access_regno PARAMS ((int, struct reg_flags, int)); +static int rws_access_reg PARAMS ((rtx, struct reg_flags, int)); +static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *)); +static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx)); +static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int)); +static void init_insn_group_barriers PARAMS ((void)); +static int group_barrier_needed_p PARAMS ((rtx)); +static int safe_group_barrier_needed_p PARAMS ((rtx)); + +/* Update *RWS for REGNO, which is being written by the current instruction, + with predicate PRED, and associated register flags in FLAGS. */ + +static void +rws_update (rws, regno, flags, pred) + struct reg_write_state *rws; + int regno; + struct reg_flags flags; + int pred; +{ + if (pred) + rws[regno].write_count++; + else + rws[regno].write_count = 2; + rws[regno].written_by_fp |= flags.is_fp; + /* ??? Not tracking and/or across differing predicates. */ + rws[regno].written_by_and = flags.is_and; + rws[regno].written_by_or = flags.is_or; + rws[regno].first_pred = pred; +} + +/* Handle an access to register REGNO of type FLAGS using predicate register + PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates + a dependency with an earlier instruction in the same group. */ + +static int +rws_access_regno (regno, flags, pred) + int regno; + struct reg_flags flags; + int pred; +{ + int need_barrier = 0; + + if (regno >= NUM_REGS) + abort (); + + if (! PR_REGNO_P (regno)) + flags.is_and = flags.is_or = 0; + + if (flags.is_write) + { + int write_count; + + /* One insn writes same reg multiple times? */ + if (rws_insn[regno].write_count > 0) + abort (); + + /* Update info for current instruction. */ + rws_update (rws_insn, regno, flags, pred); + write_count = rws_sum[regno].write_count; + + switch (write_count) + { + case 0: + /* The register has not been written yet. */ + rws_update (rws_sum, regno, flags, pred); + break; + + case 1: + /* The register has been written via a predicate. If this is + not a complementary predicate, then we need a barrier. */ + /* ??? This assumes that P and P+1 are always complementary + predicates for P even. */ + if (flags.is_and && rws_sum[regno].written_by_and) + ; + else if (flags.is_or && rws_sum[regno].written_by_or) + ; + else if ((rws_sum[regno].first_pred ^ 1) != pred) + need_barrier = 1; + rws_update (rws_sum, regno, flags, pred); + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + if (flags.is_and && rws_sum[regno].written_by_and) + ; + else if (flags.is_or && rws_sum[regno].written_by_or) + ; + else + need_barrier = 1; + rws_sum[regno].written_by_and = flags.is_and; + rws_sum[regno].written_by_or = flags.is_or; + break; + + default: + abort (); + } + } + else + { + if (flags.is_branch) + { + /* Branches have several RAW exceptions that allow to avoid + barriers. */ + + if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) + /* RAW dependencies on branch regs are permissible as long + as the writer is a non-branch instruction. Since we + never generate code that uses a branch register written + by a branch instruction, handling this case is + easy. */ + return 0; + + if (REGNO_REG_CLASS (regno) == PR_REGS + && ! rws_sum[regno].written_by_fp) + /* The predicates of a branch are available within the + same insn group as long as the predicate was written by + something other than a floating-point instruction. */ + return 0; + } + + if (flags.is_and && rws_sum[regno].written_by_and) + return 0; + if (flags.is_or && rws_sum[regno].written_by_or) + return 0; + + switch (rws_sum[regno].write_count) + { + case 0: + /* The register has not been written yet. */ + break; + + case 1: + /* The register has been written via a predicate. If this is + not a complementary predicate, then we need a barrier. */ + /* ??? This assumes that P and P+1 are always complementary + predicates for P even. */ + if ((rws_sum[regno].first_pred ^ 1) != pred) + need_barrier = 1; + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + need_barrier = 1; + break; + + default: + abort (); + } + } + + return need_barrier; +} + +static int +rws_access_reg (reg, flags, pred) + rtx reg; + struct reg_flags flags; + int pred; +{ + int regno = REGNO (reg); + int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); + + if (n == 1) + return rws_access_regno (regno, flags, pred); + else + { + int need_barrier = 0; + while (--n >= 0) + need_barrier |= rws_access_regno (regno + n, flags, pred); + return need_barrier; + } +} + +/* Examine X, which is a SET rtx, and update the flags, the predicate, and + the condition, stored in *PFLAGS, *PPRED and *PCOND. */ + +static void +update_set_flags (x, pflags, ppred, pcond) + rtx x; + struct reg_flags *pflags; + int *ppred; + rtx *pcond; +{ + rtx src = SET_SRC (x); + + *pcond = 0; + + switch (GET_CODE (src)) + { + case CALL: + return; + + case IF_THEN_ELSE: + if (SET_DEST (x) == pc_rtx) + /* X is a conditional branch. */ + return; + else + { + int is_complemented = 0; + + /* X is a conditional move. */ + rtx cond = XEXP (src, 0); + if (GET_CODE (cond) == EQ) + is_complemented = 1; + cond = XEXP (cond, 0); + if (GET_CODE (cond) != REG + && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) + abort (); + *pcond = cond; + if (XEXP (src, 1) == SET_DEST (x) + || XEXP (src, 2) == SET_DEST (x)) + { + /* X is a conditional move that conditionally writes the + destination. */ + + /* We need another complement in this case. */ + if (XEXP (src, 1) == SET_DEST (x)) + is_complemented = ! is_complemented; + + *ppred = REGNO (cond); + if (is_complemented) + ++*ppred; + } + + /* ??? If this is a conditional write to the dest, then this + instruction does not actually read one source. This probably + doesn't matter, because that source is also the dest. */ + /* ??? Multiple writes to predicate registers are allowed + if they are all AND type compares, or if they are all OR + type compares. We do not generate such instructions + currently. */ + } + /* ... fall through ... */ + + default: + if (GET_RTX_CLASS (GET_CODE (src)) == '<' + && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) + /* Set pflags->is_fp to 1 so that we know we're dealing + with a floating point comparison when processing the + destination of the SET. */ + pflags->is_fp = 1; + + /* Discover if this is a parallel comparison. We only handle + and.orcm and or.andcm at present, since we must retain a + strict inverse on the predicate pair. */ + else if (GET_CODE (src) == AND) + pflags->is_and = 1; + else if (GET_CODE (src) == IOR) + pflags->is_or = 1; + + break; + } +} + +/* Subroutine of rtx_needs_barrier; this function determines whether the + source of a given SET rtx found in X needs a barrier. FLAGS and PRED + are as in rtx_needs_barrier. COND is an rtx that holds the condition + for this insn. */ + +static int +set_src_needs_barrier (x, flags, pred, cond) + rtx x; + struct reg_flags flags; + int pred; + rtx cond; +{ + int need_barrier = 0; + rtx dst; + rtx src = SET_SRC (x); + + if (GET_CODE (src) == CALL) + /* We don't need to worry about the result registers that + get written by subroutine call. */ + return rtx_needs_barrier (src, flags, pred); + else if (SET_DEST (x) == pc_rtx) + { + /* X is a conditional branch. */ + /* ??? This seems redundant, as the caller sets this bit for + all JUMP_INSNs. */ + flags.is_branch = 1; + return rtx_needs_barrier (src, flags, pred); + } + + need_barrier = rtx_needs_barrier (src, flags, pred); + + /* This instruction unconditionally uses a predicate register. */ + if (cond) + need_barrier |= rws_access_reg (cond, flags, 0); + + dst = SET_DEST (x); + if (GET_CODE (dst) == ZERO_EXTRACT) + { + need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); + dst = XEXP (dst, 0); + } + return need_barrier; +} + +/* Handle an access to rtx X of type FLAGS using predicate register PRED. + Return 1 is this access creates a dependency with an earlier instruction + in the same group. */ + +static int +rtx_needs_barrier (x, flags, pred) + rtx x; + struct reg_flags flags; + int pred; +{ + int i, j; + int is_complemented = 0; + int need_barrier = 0; + const char *format_ptr; + struct reg_flags new_flags; + rtx cond = 0; + + if (! x) + return 0; + + new_flags = flags; + + switch (GET_CODE (x)) + { + case SET: + update_set_flags (x, &new_flags, &pred, &cond); + need_barrier = set_src_needs_barrier (x, new_flags, pred, cond); + if (GET_CODE (SET_SRC (x)) != CALL) + { + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); + } + break; + + case CALL: + new_flags.is_write = 0; + need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); + + /* Avoid multiple register writes, in case this is a pattern with + multiple CALL rtx. This avoids an abort in rws_access_reg. */ + if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count) + { + new_flags.is_write = 1; + need_barrier |= rws_access_regno (REG_RP, new_flags, pred); + need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); + need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); + } + break; + + case COND_EXEC: + /* X is a predicated instruction. */ + + cond = COND_EXEC_TEST (x); + if (pred) + abort (); + need_barrier = rtx_needs_barrier (cond, flags, 0); + + if (GET_CODE (cond) == EQ) + is_complemented = 1; + cond = XEXP (cond, 0); + if (GET_CODE (cond) != REG + && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) + abort (); + pred = REGNO (cond); + if (is_complemented) + ++pred; + + need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); + return need_barrier; + + case CLOBBER: + case USE: + /* Clobber & use are for earlier compiler-phases only. */ + break; + + case ASM_OPERANDS: + case ASM_INPUT: + /* We always emit stop bits for traditional asms. We emit stop bits + for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ + if (GET_CODE (x) != ASM_OPERANDS + || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) + { + /* Avoid writing the register multiple times if we have multiple + asm outputs. This avoids an abort in rws_access_reg. */ + if (! rws_insn[REG_VOLATILE].write_count) + { + new_flags.is_write = 1; + rws_access_regno (REG_VOLATILE, new_flags, pred); + } + return 1; + } + + /* For all ASM_OPERANDS, we must traverse the vector of input operands. + We can not just fall through here since then we would be confused + by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate + traditional asms unlike their normal usage. */ + + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) + if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) + need_barrier = 1; + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; --i) + { + rtx pat = XVECEXP (x, 0, i); + if (GET_CODE (pat) == SET) + { + update_set_flags (pat, &new_flags, &pred, &cond); + need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond); + } + else if (GET_CODE (pat) == USE + || GET_CODE (pat) == CALL + || GET_CODE (pat) == ASM_OPERANDS) + need_barrier |= rtx_needs_barrier (pat, flags, pred); + else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN) + abort (); + } + for (i = XVECLEN (x, 0) - 1; i >= 0; --i) + { + rtx pat = XVECEXP (x, 0, i); + if (GET_CODE (pat) == SET) + { + if (GET_CODE (SET_SRC (pat)) != CALL) + { + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, + pred); + } + } + else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) + need_barrier |= rtx_needs_barrier (pat, flags, pred); + } + break; + + case SUBREG: + x = SUBREG_REG (x); + /* FALLTHRU */ + case REG: + if (REGNO (x) == AR_UNAT_REGNUM) + { + for (i = 0; i < 64; ++i) + need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); + } + else + need_barrier = rws_access_reg (x, flags, pred); + break; + + case MEM: + /* Find the regs used in memory address computation. */ + new_flags.is_write = 0; + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + break; + + case CONST_INT: case CONST_DOUBLE: + case SYMBOL_REF: case LABEL_REF: case CONST: + break; + + /* Operators with side-effects. */ + case POST_INC: case POST_DEC: + if (GET_CODE (XEXP (x, 0)) != REG) + abort (); + + new_flags.is_write = 0; + need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); + new_flags.is_write = 1; + need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); + break; + + case POST_MODIFY: + if (GET_CODE (XEXP (x, 0)) != REG) + abort (); + + new_flags.is_write = 0; + need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); + new_flags.is_write = 1; + need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: case PLUS: case MINUS: case MULT: case DIV: + case MOD: case UDIV: case UMOD: case AND: case IOR: + case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: + case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: + case NE: case EQ: case GE: case GT: case LE: + case LT: case GEU: case GTU: case LEU: case LTU: + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); + break; + + case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: + case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: + case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: + case SQRT: case FFS: + need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + case 1: /* st8.spill */ + case 2: /* ld8.fill */ + { + HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); + HOST_WIDE_INT bit = (offset >> 3) & 63; + + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + new_flags.is_write = (XINT (x, 1) == 1); + need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, + new_flags, pred); + break; + } + + case 3: /* stf.spill */ + case 4: /* ldf.spill */ + case 8: /* popcnt */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + break; + + case 7: /* pred_rel_mutex */ + case 9: /* pic call */ + case 12: /* mf */ + case 19: /* fetchadd_acq */ + case 20: /* mov = ar.bsp */ + case 21: /* flushrs */ + case 22: /* bundle selector */ + case 23: /* cycle display */ + break; + + case 24: /* addp4 */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + break; + + case 5: /* recip_approx */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); + break; + + case 13: /* cmpxchg_acq */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); + need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); + break; + + default: + abort (); + } + break; + + case UNSPEC_VOLATILE: + switch (XINT (x, 1)) + { + case 0: /* alloc */ + /* Alloc must always be the first instruction of a group. + We force this by always returning true. */ + /* ??? We might get better scheduling if we explicitly check for + input/local/output register dependencies, and modify the + scheduler so that alloc is always reordered to the start of + the current group. We could then eliminate all of the + first_instruction code. */ + rws_access_regno (AR_PFS_REGNUM, flags, pred); + + new_flags.is_write = 1; + rws_access_regno (REG_AR_CFM, new_flags, pred); + return 1; + + case 1: /* blockage */ + case 2: /* insn group barrier */ + return 0; + + case 5: /* set_bsp */ + need_barrier = 1; + break; + + case 7: /* pred.rel.mutex */ + case 8: /* safe_across_calls all */ + case 9: /* safe_across_calls normal */ + return 0; + + default: + abort (); + } + break; + + case RETURN: + new_flags.is_write = 0; + need_barrier = rws_access_regno (REG_RP, flags, pred); + need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); + + new_flags.is_write = 1; + need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); + need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case '0': /* unused field */ + case 'i': /* integer */ + case 'n': /* note */ + case 'w': /* wide integer */ + case 's': /* pointer to string */ + case 'S': /* optional pointer to string */ + break; + + case 'e': + if (rtx_needs_barrier (XEXP (x, i), flags, pred)) + need_barrier = 1; + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; --j) + if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) + need_barrier = 1; + break; + + default: + abort (); + } + break; + } + return need_barrier; +} + +/* Clear out the state for group_barrier_needed_p at the start of a + sequence of insns. */ + +static void +init_insn_group_barriers () +{ + memset (rws_sum, 0, sizeof (rws_sum)); + first_instruction = 1; +} + +/* Given the current state, recorded by previous calls to this function, + determine whether a group barrier (a stop bit) is necessary before INSN. + Return nonzero if so. */ + +static int +group_barrier_needed_p (insn) + rtx insn; +{ + rtx pat; + int need_barrier = 0; + struct reg_flags flags; + + memset (&flags, 0, sizeof (flags)); + switch (GET_CODE (insn)) + { + case NOTE: + break; + + case BARRIER: + /* A barrier doesn't imply an instruction group boundary. */ + break; + + case CODE_LABEL: + memset (rws_insn, 0, sizeof (rws_insn)); + return 1; + + case CALL_INSN: + flags.is_branch = 1; + flags.is_sibcall = SIBLING_CALL_P (insn); + memset (rws_insn, 0, sizeof (rws_insn)); + + /* Don't bundle a call following another call. */ + if ((pat = prev_active_insn (insn)) + && GET_CODE (pat) == CALL_INSN) + { + need_barrier = 1; + break; + } + + need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); + break; + + case JUMP_INSN: + flags.is_branch = 1; + + /* Don't bundle a jump following a call. */ + if ((pat = prev_active_insn (insn)) + && GET_CODE (pat) == CALL_INSN) + { + need_barrier = 1; + break; + } + /* FALLTHRU */ + + case INSN: + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + /* Don't care about USE and CLOBBER "insns"---those are used to + indicate to the optimizer that it shouldn't get rid of + certain operations. */ + break; + + pat = PATTERN (insn); + + /* Ug. Hack hacks hacked elsewhere. */ + switch (recog_memoized (insn)) + { + /* We play dependency tricks with the epilogue in order + to get proper schedules. Undo this for dv analysis. */ + case CODE_FOR_epilogue_deallocate_stack: + pat = XVECEXP (pat, 0, 0); + break; + + /* The pattern we use for br.cloop confuses the code above. + The second element of the vector is representative. */ + case CODE_FOR_doloop_end_internal: + pat = XVECEXP (pat, 0, 1); + break; + + /* Doesn't generate code. */ + case CODE_FOR_pred_rel_mutex: + case CODE_FOR_prologue_use: + return 0; + + default: + break; + } + + memset (rws_insn, 0, sizeof (rws_insn)); + need_barrier = rtx_needs_barrier (pat, flags, 0); + + /* Check to see if the previous instruction was a volatile + asm. */ + if (! need_barrier) + need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); + break; + + default: + abort (); + } + + if (first_instruction) + { + need_barrier = 0; + first_instruction = 0; + } + + return need_barrier; +} + +/* Like group_barrier_needed_p, but do not clobber the current state. */ + +static int +safe_group_barrier_needed_p (insn) + rtx insn; +{ + struct reg_write_state rws_saved[NUM_REGS]; + int saved_first_instruction; + int t; + + memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved); + saved_first_instruction = first_instruction; + + t = group_barrier_needed_p (insn); + + memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved); + first_instruction = saved_first_instruction; + + return t; +} + +/* INSNS is an chain of instructions. Scan the chain, and insert stop bits + as necessary to eliminate dependendencies. This function assumes that + a final instruction scheduling pass has been run which has already + inserted most of the necessary stop bits. This function only inserts + new ones at basic block boundaries, since these are invisible to the + scheduler. */ + +static void +emit_insn_group_barriers (dump, insns) + FILE *dump; + rtx insns; +{ + rtx insn; + rtx last_label = 0; + int insns_since_last_label = 0; + + init_insn_group_barriers (); + + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == CODE_LABEL) + { + if (insns_since_last_label) + last_label = insn; + insns_since_last_label = 0; + } + else if (GET_CODE (insn) == NOTE + && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) + { + if (insns_since_last_label) + last_label = insn; + insns_since_last_label = 0; + } + else if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == 2) + { + init_insn_group_barriers (); + last_label = 0; + } + else if (INSN_P (insn)) + { + insns_since_last_label = 1; + + if (group_barrier_needed_p (insn)) + { + if (last_label) + { + if (dump) + fprintf (dump, "Emitting stop before label %d\n", + INSN_UID (last_label)); + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); + insn = last_label; + + init_insn_group_barriers (); + last_label = 0; + } + } + } + } +} + +/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. + This function has to emit all necessary group barriers. */ + +static void +emit_all_insn_group_barriers (dump, insns) + FILE *dump ATTRIBUTE_UNUSED; + rtx insns; +{ + rtx insn; + + init_insn_group_barriers (); + + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == BARRIER) + { + rtx last = prev_active_insn (insn); + + if (! last) + continue; + if (GET_CODE (last) == JUMP_INSN + && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) + last = prev_active_insn (last); + if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); + + init_insn_group_barriers (); + } + else if (INSN_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) + init_insn_group_barriers (); + else if (group_barrier_needed_p (insn)) + { + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); + init_insn_group_barriers (); + group_barrier_needed_p (insn); + } + } + } +} + +static int errata_find_address_regs PARAMS ((rtx *, void *)); +static void errata_emit_nops PARAMS ((rtx)); +static void fixup_errata PARAMS ((void)); + +/* This structure is used to track some details about the previous insns + groups so we can determine if it may be necessary to insert NOPs to + workaround hardware errata. */ +static struct group +{ + HARD_REG_SET p_reg_set; + HARD_REG_SET gr_reg_conditionally_set; +} last_group[2]; + +/* Index into the last_group array. */ +static int group_idx; + +/* Called through for_each_rtx; determines if a hard register that was + conditionally set in the previous group is used as an address register. + It ensures that for_each_rtx returns 1 in that case. */ +static int +errata_find_address_regs (xp, data) + rtx *xp; + void *data ATTRIBUTE_UNUSED; +{ + rtx x = *xp; + if (GET_CODE (x) != MEM) + return 0; + x = XEXP (x, 0); + if (GET_CODE (x) == POST_MODIFY) + x = XEXP (x, 0); + if (GET_CODE (x) == REG) + { + struct group *prev_group = last_group + (group_idx ^ 1); + if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set, + REGNO (x))) + return 1; + return -1; + } + return 0; +} + +/* Called for each insn; this function keeps track of the state in + last_group and emits additional NOPs if necessary to work around + an Itanium A/B step erratum. */ +static void +errata_emit_nops (insn) + rtx insn; +{ + struct group *this_group = last_group + group_idx; + struct group *prev_group = last_group + (group_idx ^ 1); + rtx pat = PATTERN (insn); + rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0; + rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat; + enum attr_type type; + rtx set = real_pat; + + if (GET_CODE (real_pat) == USE + || GET_CODE (real_pat) == CLOBBER + || GET_CODE (real_pat) == ASM_INPUT + || GET_CODE (real_pat) == ADDR_VEC + || GET_CODE (real_pat) == ADDR_DIFF_VEC + || asm_noperands (PATTERN (insn)) >= 0) + return; + + /* single_set doesn't work for COND_EXEC insns, so we have to duplicate + parts of it. */ + + if (GET_CODE (set) == PARALLEL) + { + int i; + set = XVECEXP (real_pat, 0, 0); + for (i = 1; i < XVECLEN (real_pat, 0); i++) + if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE + && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER) + { + set = 0; + break; + } + } + + if (set && GET_CODE (set) != SET) + set = 0; + + type = get_attr_type (insn); + + if (type == TYPE_F + && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set)))) + SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set))); + + if ((type == TYPE_M || type == TYPE_A) && cond && set + && REG_P (SET_DEST (set)) + && GET_CODE (SET_SRC (set)) != PLUS + && GET_CODE (SET_SRC (set)) != MINUS + && (GET_CODE (SET_SRC (set)) != ASHIFT + || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode)) + && (GET_CODE (SET_SRC (set)) != MEM + || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY) + && GENERAL_REGNO_P (REGNO (SET_DEST (set)))) + { + if (GET_RTX_CLASS (GET_CODE (cond)) != '<' + || ! REG_P (XEXP (cond, 0))) + abort (); + + if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0)))) + SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set))); + } + if (for_each_rtx (&real_pat, errata_find_address_regs, NULL)) + { + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); + emit_insn_before (gen_nop (), insn); + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); + group_idx = 0; + memset (last_group, 0, sizeof last_group); + } +} + +/* Emit extra nops if they are required to work around hardware errata. */ + +static void +fixup_errata () +{ + rtx insn; + + if (! TARGET_B_STEP) + return; + + group_idx = 0; + memset (last_group, 0, sizeof last_group); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + + if (ia64_safe_type (insn) == TYPE_S) + { + group_idx ^= 1; + memset (last_group + group_idx, 0, sizeof last_group[group_idx]); + } + else + errata_emit_nops (insn); + } +} + +/* Instruction scheduling support. */ +/* Describe one bundle. */ + +struct bundle +{ + /* Zero if there's no possibility of a stop in this bundle other than + at the end, otherwise the position of the optional stop bit. */ + int possible_stop; + /* The types of the three slots. */ + enum attr_type t[3]; + /* The pseudo op to be emitted into the assembler output. */ + const char *name; +}; + +#define NR_BUNDLES 10 + +/* A list of all available bundles. */ + +static const struct bundle bundle[NR_BUNDLES] = +{ + { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" }, + { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" }, + { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" }, + { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" }, +#if NR_BUNDLES == 10 + { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" }, + { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" }, +#endif + { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" }, + { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" }, + { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" }, + /* .mfi needs to occur earlier than .mlx, so that we only generate it if + it matches an L type insn. Otherwise we'll try to generate L type + nops. */ + { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" } +}; + +/* Describe a packet of instructions. Packets consist of two bundles that + are visible to the hardware in one scheduling window. */ + +struct ia64_packet +{ + const struct bundle *t1, *t2; + /* Precomputed value of the first split issue in this packet if a cycle + starts at its beginning. */ + int first_split; + /* For convenience, the insn types are replicated here so we don't have + to go through T1 and T2 all the time. */ + enum attr_type t[6]; +}; + +/* An array containing all possible packets. */ +#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES) +static struct ia64_packet packets[NR_PACKETS]; + +/* Map attr_type to a string with the name. */ + +static const char *const type_names[] = +{ + "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S" +}; + +/* Nonzero if we should insert stop bits into the schedule. */ +int ia64_final_schedule = 0; + +static int itanium_split_issue PARAMS ((const struct ia64_packet *, int)); +static rtx ia64_single_set PARAMS ((rtx)); +static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx)); +static void ia64_emit_insn_before PARAMS ((rtx, rtx)); +static void maybe_rotate PARAMS ((FILE *)); +static void finish_last_head PARAMS ((FILE *, int)); +static void rotate_one_bundle PARAMS ((FILE *)); +static void rotate_two_bundles PARAMS ((FILE *)); +static void nop_cycles_until PARAMS ((int, FILE *)); +static void cycle_end_fill_slots PARAMS ((FILE *)); +static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *)); +static int get_split PARAMS ((const struct ia64_packet *, int)); +static int find_best_insn PARAMS ((rtx *, enum attr_type *, int, + const struct ia64_packet *, int)); +static void find_best_packet PARAMS ((int *, const struct ia64_packet **, + rtx *, enum attr_type *, int)); +static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int)); +static void dump_current_packet PARAMS ((FILE *)); +static void schedule_stop PARAMS ((FILE *)); +static rtx gen_nop_type PARAMS ((enum attr_type)); +static void ia64_emit_nops PARAMS ((void)); + +/* Map a bundle number to its pseudo-op. */ + +const char * +get_bundle_name (b) + int b; +{ + return bundle[b].name; +} + +/* Compute the slot which will cause a split issue in packet P if the + current cycle begins at slot BEGIN. */ + +static int +itanium_split_issue (p, begin) + const struct ia64_packet *p; + int begin; +{ + int type_count[TYPE_S]; + int i; + int split = 6; + + if (begin < 3) + { + /* Always split before and after MMF. */ + if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F) + return 3; + if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F) + return 3; + /* Always split after MBB and BBB. */ + if (p->t[1] == TYPE_B) + return 3; + /* Split after first bundle in MIB BBB combination. */ + if (p->t[2] == TYPE_B && p->t[3] == TYPE_B) + return 3; + } + + memset (type_count, 0, sizeof type_count); + for (i = begin; i < split; i++) + { + enum attr_type t0 = p->t[i]; + /* An MLX bundle reserves the same units as an MFI bundle. */ + enum attr_type t = (t0 == TYPE_L ? TYPE_F + : t0 == TYPE_X ? TYPE_I + : t0); + + /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and + 2 integer per cycle. */ + int max = (t == TYPE_B ? 3 : 2); + if (type_count[t] == max) + return i; + + type_count[t]++; + } + return split; +} + +/* Return the maximum number of instructions a cpu can issue. */ + +static int +ia64_issue_rate () +{ + return 6; +} + +/* Helper function - like single_set, but look inside COND_EXEC. */ + +static rtx +ia64_single_set (insn) + rtx insn; +{ + rtx x = PATTERN (insn), ret; + if (GET_CODE (x) == COND_EXEC) + x = COND_EXEC_CODE (x); + if (GET_CODE (x) == SET) + return x; + ret = single_set_2 (insn, x); + if (ret == NULL && GET_CODE (x) == PARALLEL) + { + /* Special case here prologue_allocate_stack and + epilogue_deallocate_stack. Although it is not a classical + single set, the second set is there just to protect it + from moving past FP-relative stack accesses. */ + if (XVECLEN (x, 0) == 2 + && GET_CODE (XVECEXP (x, 0, 0)) == SET + && GET_CODE (XVECEXP (x, 0, 1)) == SET + && GET_CODE (SET_DEST (XVECEXP (x, 0, 1))) == REG + && SET_DEST (XVECEXP (x, 0, 1)) == SET_SRC (XVECEXP (x, 0, 1)) + && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) + ret = XVECEXP (x, 0, 0); + } + return ret; +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +ia64_adjust_cost (insn, link, dep_insn, cost) + rtx insn, link, dep_insn; + int cost; +{ + enum attr_type dep_type; + enum attr_itanium_class dep_class; + enum attr_itanium_class insn_class; + rtx dep_set, set, src, addr; + + if (GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (dep_insn)) == CLOBBER + || GET_CODE (PATTERN (dep_insn)) == USE + /* @@@ Not accurate for indirect calls. */ + || GET_CODE (insn) == CALL_INSN + || ia64_safe_type (insn) == TYPE_S) + return 0; + + if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT + || REG_NOTE_KIND (link) == REG_DEP_ANTI) + return 0; + + dep_type = ia64_safe_type (dep_insn); + dep_class = ia64_safe_itanium_class (dep_insn); + insn_class = ia64_safe_itanium_class (insn); + + /* Compares that feed a conditional branch can execute in the same + cycle. */ + dep_set = ia64_single_set (dep_insn); + set = ia64_single_set (insn); + + if (dep_type != TYPE_F + && dep_set + && GET_CODE (SET_DEST (dep_set)) == REG + && PR_REG (REGNO (SET_DEST (dep_set))) + && GET_CODE (insn) == JUMP_INSN) + return 0; + + if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM) + { + /* ??? Can't find any information in the documenation about whether + a sequence + st [rx] = ra + ld rb = [ry] + splits issue. Assume it doesn't. */ + return 0; + } + + src = set ? SET_SRC (set) : 0; + addr = 0; + if (set) + { + if (GET_CODE (SET_DEST (set)) == MEM) + addr = XEXP (SET_DEST (set), 0); + else if (GET_CODE (SET_DEST (set)) == SUBREG + && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM) + addr = XEXP (SUBREG_REG (SET_DEST (set)), 0); + else + { + addr = src; + if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0) + addr = XVECEXP (addr, 0, 0); + while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == MEM) + addr = XEXP (addr, 0); + else + addr = 0; + } + } + + if (addr && GET_CODE (addr) == POST_MODIFY) + addr = XEXP (addr, 0); + + set = ia64_single_set (dep_insn); + + if ((dep_class == ITANIUM_CLASS_IALU + || dep_class == ITANIUM_CLASS_ILOG + || dep_class == ITANIUM_CLASS_LD) + && (insn_class == ITANIUM_CLASS_LD + || insn_class == ITANIUM_CLASS_ST)) + { + if (! addr || ! set) + abort (); + /* This isn't completely correct - an IALU that feeds an address has + a latency of 1 cycle if it's issued in an M slot, but 2 cycles + otherwise. Unfortunately there's no good way to describe this. */ + if (reg_overlap_mentioned_p (SET_DEST (set), addr)) + return cost + 1; + } + if ((dep_class == ITANIUM_CLASS_IALU + || dep_class == ITANIUM_CLASS_ILOG + || dep_class == ITANIUM_CLASS_LD) + && (insn_class == ITANIUM_CLASS_MMMUL + || insn_class == ITANIUM_CLASS_MMSHF + || insn_class == ITANIUM_CLASS_MMSHFI)) + return 3; + if (dep_class == ITANIUM_CLASS_FMAC + && (insn_class == ITANIUM_CLASS_FMISC + || insn_class == ITANIUM_CLASS_FCVTFX + || insn_class == ITANIUM_CLASS_XMPY)) + return 7; + if ((dep_class == ITANIUM_CLASS_FMAC + || dep_class == ITANIUM_CLASS_FMISC + || dep_class == ITANIUM_CLASS_FCVTFX + || dep_class == ITANIUM_CLASS_XMPY) + && insn_class == ITANIUM_CLASS_STF) + return 8; + if ((dep_class == ITANIUM_CLASS_MMMUL + || dep_class == ITANIUM_CLASS_MMSHF + || dep_class == ITANIUM_CLASS_MMSHFI) + && (insn_class == ITANIUM_CLASS_LD + || insn_class == ITANIUM_CLASS_ST + || insn_class == ITANIUM_CLASS_IALU + || insn_class == ITANIUM_CLASS_ILOG + || insn_class == ITANIUM_CLASS_ISHF)) + return 4; + + return cost; +} + +/* Describe the current state of the Itanium pipeline. */ +static struct +{ + /* The first slot that is used in the current cycle. */ + int first_slot; + /* The next slot to fill. */ + int cur; + /* The packet we have selected for the current issue window. */ + const struct ia64_packet *packet; + /* The position of the split issue that occurs due to issue width + limitations (6 if there's no split issue). */ + int split; + /* Record data about the insns scheduled so far in the same issue + window. The elements up to but not including FIRST_SLOT belong + to the previous cycle, the ones starting with FIRST_SLOT belong + to the current cycle. */ + enum attr_type types[6]; + rtx insns[6]; + int stopbit[6]; + /* Nonzero if we decided to schedule a stop bit. */ + int last_was_stop; +} sched_data; + +/* Temporary arrays; they have enough elements to hold all insns that + can be ready at the same time while scheduling of the current block. + SCHED_READY can hold ready insns, SCHED_TYPES their types. */ +static rtx *sched_ready; +static enum attr_type *sched_types; + +/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT + of packet P. */ + +static int +insn_matches_slot (p, itype, slot, insn) + const struct ia64_packet *p; + enum attr_type itype; + int slot; + rtx insn; +{ + enum attr_itanium_requires_unit0 u0; + enum attr_type stype = p->t[slot]; + + if (insn) + { + u0 = ia64_safe_itanium_requires_unit0 (insn); + if (u0 == ITANIUM_REQUIRES_UNIT0_YES) + { + int i; + for (i = sched_data.first_slot; i < slot; i++) + if (p->t[i] == stype + || (stype == TYPE_F && p->t[i] == TYPE_L) + || (stype == TYPE_I && p->t[i] == TYPE_X)) + return 0; + } + if (GET_CODE (insn) == CALL_INSN) + { + /* Reject calls in multiway branch packets. We want to limit + the number of multiway branches we generate (since the branch + predictor is limited), and this seems to work fairly well. + (If we didn't do this, we'd have to add another test here to + force calls into the third slot of the bundle.) */ + if (slot < 3) + { + if (p->t[1] == TYPE_B) + return 0; + } + else + { + if (p->t[4] == TYPE_B) + return 0; + } + } + } + + if (itype == stype) + return 1; + if (itype == TYPE_A) + return stype == TYPE_M || stype == TYPE_I; + return 0; +} + +/* Like emit_insn_before, but skip cycle_display insns. This makes the + assembly output a bit prettier. */ + +static void +ia64_emit_insn_before (insn, before) + rtx insn, before; +{ + rtx prev = PREV_INSN (before); + if (prev && GET_CODE (prev) == INSN + && GET_CODE (PATTERN (prev)) == UNSPEC + && XINT (PATTERN (prev), 1) == 23) + before = prev; + emit_insn_before (insn, before); +} + +#if 0 +/* Generate a nop insn of the given type. Note we never generate L type + nops. */ + +static rtx +gen_nop_type (t) + enum attr_type t; +{ + switch (t) + { + case TYPE_M: + return gen_nop_m (); + case TYPE_I: + return gen_nop_i (); + case TYPE_B: + return gen_nop_b (); + case TYPE_F: + return gen_nop_f (); + case TYPE_X: + return gen_nop_x (); + default: + abort (); + } +} +#endif + +/* When rotating a bundle out of the issue window, insert a bundle selector + insn in front of it. DUMP is the scheduling dump file or NULL. START + is either 0 or 3, depending on whether we want to emit a bundle selector + for the first bundle or the second bundle in the current issue window. + + The selector insns are emitted this late because the selected packet can + be changed until parts of it get rotated out. */ + +static void +finish_last_head (dump, start) + FILE *dump; + int start; +{ + const struct ia64_packet *p = sched_data.packet; + const struct bundle *b = start == 0 ? p->t1 : p->t2; + int bundle_type = b - bundle; + rtx insn; + int i; + + if (! ia64_final_schedule) + return; + + for (i = start; sched_data.insns[i] == 0; i++) + if (i == start + 3) + abort (); + insn = sched_data.insns[i]; + + if (dump) + fprintf (dump, "// Emitting template before %d: %s\n", + INSN_UID (insn), b->name); + + ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn); +} + +/* We can't schedule more insns this cycle. Fix up the scheduling state + and advance FIRST_SLOT and CUR. + We have to distribute the insns that are currently found between + FIRST_SLOT and CUR into the slots of the packet we have selected. So + far, they are stored successively in the fields starting at FIRST_SLOT; + now they must be moved to the correct slots. + DUMP is the current scheduling dump file, or NULL. */ + +static void +cycle_end_fill_slots (dump) + FILE *dump; +{ + const struct ia64_packet *packet = sched_data.packet; + int slot, i; + enum attr_type tmp_types[6]; + rtx tmp_insns[6]; + + memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type)); + memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx)); + + for (i = slot = sched_data.first_slot; i < sched_data.cur; i++) + { + enum attr_type t = tmp_types[i]; + if (t != ia64_safe_type (tmp_insns[i])) + abort (); + while (! insn_matches_slot (packet, t, slot, tmp_insns[i])) + { + if (slot > sched_data.split) + abort (); + if (dump) + fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]], + type_names[t]); + sched_data.types[slot] = packet->t[slot]; + sched_data.insns[slot] = 0; + sched_data.stopbit[slot] = 0; + + /* ??? TYPE_L instructions always fill up two slots, but we don't + support TYPE_L nops. */ + if (packet->t[slot] == TYPE_L) + abort (); + + slot++; + } + /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the + actual slot type later. */ + sched_data.types[slot] = packet->t[slot]; + sched_data.insns[slot] = tmp_insns[i]; + sched_data.stopbit[slot] = 0; + slot++; + /* TYPE_L instructions always fill up two slots. */ + if (t == TYPE_L) + slot++; + } + + /* This isn't right - there's no need to pad out until the forced split; + the CPU will automatically split if an insn isn't ready. */ +#if 0 + while (slot < sched_data.split) + { + sched_data.types[slot] = packet->t[slot]; + sched_data.insns[slot] = 0; + sched_data.stopbit[slot] = 0; + slot++; + } +#endif + + sched_data.first_slot = sched_data.cur = slot; +} + +/* Bundle rotations, as described in the Itanium optimization manual. + We can rotate either one or both bundles out of the issue window. + DUMP is the current scheduling dump file, or NULL. */ + +static void +rotate_one_bundle (dump) + FILE *dump; +{ + if (dump) + fprintf (dump, "// Rotating one bundle.\n"); + + finish_last_head (dump, 0); + if (sched_data.cur > 3) + { + sched_data.cur -= 3; + sched_data.first_slot -= 3; + memmove (sched_data.types, + sched_data.types + 3, + sched_data.cur * sizeof *sched_data.types); + memmove (sched_data.stopbit, + sched_data.stopbit + 3, + sched_data.cur * sizeof *sched_data.stopbit); + memmove (sched_data.insns, + sched_data.insns + 3, + sched_data.cur * sizeof *sched_data.insns); + } + else + { + sched_data.cur = 0; + sched_data.first_slot = 0; + } +} + +static void +rotate_two_bundles (dump) + FILE *dump; +{ + if (dump) + fprintf (dump, "// Rotating two bundles.\n"); + + if (sched_data.cur == 0) + return; + + finish_last_head (dump, 0); + if (sched_data.cur > 3) + finish_last_head (dump, 3); + sched_data.cur = 0; + sched_data.first_slot = 0; +} + +/* We're beginning a new block. Initialize data structures as necessary. */ + +static void +ia64_sched_init (dump, sched_verbose, max_ready) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + int max_ready; +{ + static int initialized = 0; + + if (! initialized) + { + int b1, b2, i; + + initialized = 1; + + for (i = b1 = 0; b1 < NR_BUNDLES; b1++) + { + const struct bundle *t1 = bundle + b1; + for (b2 = 0; b2 < NR_BUNDLES; b2++, i++) + { + const struct bundle *t2 = bundle + b2; + + packets[i].t1 = t1; + packets[i].t2 = t2; + } + } + for (i = 0; i < NR_PACKETS; i++) + { + int j; + for (j = 0; j < 3; j++) + packets[i].t[j] = packets[i].t1->t[j]; + for (j = 0; j < 3; j++) + packets[i].t[j + 3] = packets[i].t2->t[j]; + packets[i].first_split = itanium_split_issue (packets + i, 0); + } + + } + + init_insn_group_barriers (); + + memset (&sched_data, 0, sizeof sched_data); + sched_types = (enum attr_type *) xmalloc (max_ready + * sizeof (enum attr_type)); + sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx)); +} + +/* See if the packet P can match the insns we have already scheduled. Return + nonzero if so. In *PSLOT, we store the first slot that is available for + more instructions if we choose this packet. + SPLIT holds the last slot we can use, there's a split issue after it so + scheduling beyond it would cause us to use more than one cycle. */ + +static int +packet_matches_p (p, split, pslot) + const struct ia64_packet *p; + int split; + int *pslot; +{ + int filled = sched_data.cur; + int first = sched_data.first_slot; + int i, slot; + + /* First, check if the first of the two bundles must be a specific one (due + to stop bits). */ + if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1) + return 0; + if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2) + return 0; + + for (i = 0; i < first; i++) + if (! insn_matches_slot (p, sched_data.types[i], i, + sched_data.insns[i])) + return 0; + for (i = slot = first; i < filled; i++) + { + while (slot < split) + { + if (insn_matches_slot (p, sched_data.types[i], slot, + sched_data.insns[i])) + break; + slot++; + } + if (slot == split) + return 0; + slot++; + } + + if (pslot) + *pslot = slot; + return 1; +} + +/* A frontend for itanium_split_issue. For a packet P and a slot + number FIRST that describes the start of the current clock cycle, + return the slot number of the first split issue. This function + uses the cached number found in P if possible. */ + +static int +get_split (p, first) + const struct ia64_packet *p; + int first; +{ + if (first == 0) + return p->first_split; + return itanium_split_issue (p, first); +} + +/* Given N_READY insns in the array READY, whose types are found in the + corresponding array TYPES, return the insn that is best suited to be + scheduled in slot SLOT of packet P. */ + +static int +find_best_insn (ready, types, n_ready, p, slot) + rtx *ready; + enum attr_type *types; + int n_ready; + const struct ia64_packet *p; + int slot; +{ + int best = -1; + int best_pri = 0; + while (n_ready-- > 0) + { + rtx insn = ready[n_ready]; + if (! insn) + continue; + if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri) + break; + /* If we have equally good insns, one of which has a stricter + slot requirement, prefer the one with the stricter requirement. */ + if (best >= 0 && types[n_ready] == TYPE_A) + continue; + if (insn_matches_slot (p, types[n_ready], slot, insn)) + { + best = n_ready; + best_pri = INSN_PRIORITY (ready[best]); + + /* If there's no way we could get a stricter requirement, stop + looking now. */ + if (types[n_ready] != TYPE_A + && ia64_safe_itanium_requires_unit0 (ready[n_ready])) + break; + break; + } + } + return best; +} + +/* Select the best packet to use given the current scheduler state and the + current ready list. + READY is an array holding N_READY ready insns; TYPES is a corresponding + array that holds their types. Store the best packet in *PPACKET and the + number of insns that can be scheduled in the current cycle in *PBEST. */ + +static void +find_best_packet (pbest, ppacket, ready, types, n_ready) + int *pbest; + const struct ia64_packet **ppacket; + rtx *ready; + enum attr_type *types; + int n_ready; +{ + int first = sched_data.first_slot; + int best = 0; + int lowest_end = 6; + const struct ia64_packet *best_packet = NULL; + int i; + + for (i = 0; i < NR_PACKETS; i++) + { + const struct ia64_packet *p = packets + i; + int slot; + int split = get_split (p, first); + int win = 0; + int first_slot, last_slot; + int b_nops = 0; + + if (! packet_matches_p (p, split, &first_slot)) + continue; + + memcpy (sched_ready, ready, n_ready * sizeof (rtx)); + + win = 0; + last_slot = 6; + for (slot = first_slot; slot < split; slot++) + { + int insn_nr; + + /* Disallow a degenerate case where the first bundle doesn't + contain anything but NOPs! */ + if (first_slot == 0 && win == 0 && slot == 3) + { + win = -1; + break; + } + + insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot); + if (insn_nr >= 0) + { + sched_ready[insn_nr] = 0; + last_slot = slot; + win++; + } + else if (p->t[slot] == TYPE_B) + b_nops++; + } + /* We must disallow MBB/BBB packets if any of their B slots would be + filled with nops. */ + if (last_slot < 3) + { + if (p->t[1] == TYPE_B && (b_nops || last_slot < 2)) + win = -1; + } + else + { + if (p->t[4] == TYPE_B && (b_nops || last_slot < 5)) + win = -1; + } + + if (win > best + || (win == best && last_slot < lowest_end)) + { + best = win; + lowest_end = last_slot; + best_packet = p; + } + } + *pbest = best; + *ppacket = best_packet; +} + +/* Reorder the ready list so that the insns that can be issued in this cycle + are found in the correct order at the end of the list. + DUMP is the scheduling dump file, or NULL. READY points to the start, + E_READY to the end of the ready list. MAY_FAIL determines what should be + done if no insns can be scheduled in this cycle: if it is zero, we abort, + otherwise we return 0. + Return 1 if any insns can be scheduled in this cycle. */ + +static int +itanium_reorder (dump, ready, e_ready, may_fail) + FILE *dump; + rtx *ready; + rtx *e_ready; + int may_fail; +{ + const struct ia64_packet *best_packet; + int n_ready = e_ready - ready; + int first = sched_data.first_slot; + int i, best, best_split, filled; + + for (i = 0; i < n_ready; i++) + sched_types[i] = ia64_safe_type (ready[i]); + + find_best_packet (&best, &best_packet, ready, sched_types, n_ready); + + if (best == 0) + { + if (may_fail) + return 0; + abort (); + } + + if (dump) + { + fprintf (dump, "// Selected bundles: %s %s (%d insns)\n", + best_packet->t1->name, + best_packet->t2 ? best_packet->t2->name : NULL, best); + } + + best_split = itanium_split_issue (best_packet, first); + packet_matches_p (best_packet, best_split, &filled); + + for (i = filled; i < best_split; i++) + { + int insn_nr; + + insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i); + if (insn_nr >= 0) + { + rtx insn = ready[insn_nr]; + memmove (ready + insn_nr, ready + insn_nr + 1, + (n_ready - insn_nr - 1) * sizeof (rtx)); + memmove (sched_types + insn_nr, sched_types + insn_nr + 1, + (n_ready - insn_nr - 1) * sizeof (enum attr_type)); + ready[--n_ready] = insn; + } + } + + sched_data.packet = best_packet; + sched_data.split = best_split; + return 1; +} + +/* Dump information about the current scheduling state to file DUMP. */ + +static void +dump_current_packet (dump) + FILE *dump; +{ + int i; + fprintf (dump, "// %d slots filled:", sched_data.cur); + for (i = 0; i < sched_data.first_slot; i++) + { + rtx insn = sched_data.insns[i]; + fprintf (dump, " %s", type_names[sched_data.types[i]]); + if (insn) + fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]); + if (sched_data.stopbit[i]) + fprintf (dump, " ;;"); + } + fprintf (dump, " :::"); + for (i = sched_data.first_slot; i < sched_data.cur; i++) + { + rtx insn = sched_data.insns[i]; + enum attr_type t = ia64_safe_type (insn); + fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]); + } + fprintf (dump, "\n"); +} + +/* Schedule a stop bit. DUMP is the current scheduling dump file, or + NULL. */ + +static void +schedule_stop (dump) + FILE *dump; +{ + const struct ia64_packet *best = sched_data.packet; + int i; + int best_stop = 6; + + if (dump) + fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur); + + if (sched_data.cur == 0) + { + if (dump) + fprintf (dump, "// At start of bundle, so nothing to do.\n"); + + rotate_two_bundles (NULL); + return; + } + + for (i = -1; i < NR_PACKETS; i++) + { + /* This is a slight hack to give the current packet the first chance. + This is done to avoid e.g. switching from MIB to MBB bundles. */ + const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet); + int split = get_split (p, sched_data.first_slot); + const struct bundle *compare; + int next, stoppos; + + if (! packet_matches_p (p, split, &next)) + continue; + + compare = next > 3 ? p->t2 : p->t1; + + stoppos = 3; + if (compare->possible_stop) + stoppos = compare->possible_stop; + if (next > 3) + stoppos += 3; + + if (stoppos < next || stoppos >= best_stop) + { + if (compare->possible_stop == 0) + continue; + stoppos = (next > 3 ? 6 : 3); + } + if (stoppos < next || stoppos >= best_stop) + continue; + + if (dump) + fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n", + best->t1->name, best->t2->name, p->t1->name, p->t2->name, + stoppos); + + best_stop = stoppos; + best = p; + } + + sched_data.packet = best; + cycle_end_fill_slots (dump); + while (sched_data.cur < best_stop) + { + sched_data.types[sched_data.cur] = best->t[sched_data.cur]; + sched_data.insns[sched_data.cur] = 0; + sched_data.stopbit[sched_data.cur] = 0; + sched_data.cur++; + } + sched_data.stopbit[sched_data.cur - 1] = 1; + sched_data.first_slot = best_stop; + + if (dump) + dump_current_packet (dump); +} + +/* If necessary, perform one or two rotations on the scheduling state. + This should only be called if we are starting a new cycle. */ + +static void +maybe_rotate (dump) + FILE *dump; +{ + if (sched_data.cur == 6) + rotate_two_bundles (dump); + else if (sched_data.cur >= 3) + rotate_one_bundle (dump); + sched_data.first_slot = sched_data.cur; +} + +/* The clock cycle when ia64_sched_reorder was last called. */ +static int prev_cycle; + +/* The first insn scheduled in the previous cycle. This is the saved + value of sched_data.first_slot. */ +static int prev_first; + +/* The last insn that has been scheduled. At the start of a new cycle + we know that we can emit new insns after it; the main scheduling code + has already emitted a cycle_display insn after it and is using that + as its current last insn. */ +static rtx last_issued; + +/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to + pad out the delay between MM (shifts, etc.) and integer operations. */ + +static void +nop_cycles_until (clock_var, dump) + int clock_var; + FILE *dump; +{ + int prev_clock = prev_cycle; + int cycles_left = clock_var - prev_clock; + + /* Finish the previous cycle; pad it out with NOPs. */ + if (sched_data.cur == 3) + { + rtx t = gen_insn_group_barrier (GEN_INT (3)); + last_issued = emit_insn_after (t, last_issued); + maybe_rotate (dump); + } + else if (sched_data.cur > 0) + { + int need_stop = 0; + int split = itanium_split_issue (sched_data.packet, prev_first); + + if (sched_data.cur < 3 && split > 3) + { + split = 3; + need_stop = 1; + } + + if (split > sched_data.cur) + { + int i; + for (i = sched_data.cur; i < split; i++) + { + rtx t; + + t = gen_nop_type (sched_data.packet->t[i]); + last_issued = emit_insn_after (t, last_issued); + sched_data.types[i] = sched_data.packet->t[sched_data.cur]; + sched_data.insns[i] = last_issued; + sched_data.stopbit[i] = 0; + } + sched_data.cur = split; + } + + if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6 + && cycles_left > 1) + { + int i; + for (i = sched_data.cur; i < 6; i++) + { + rtx t; + + t = gen_nop_type (sched_data.packet->t[i]); + last_issued = emit_insn_after (t, last_issued); + sched_data.types[i] = sched_data.packet->t[sched_data.cur]; + sched_data.insns[i] = last_issued; + sched_data.stopbit[i] = 0; + } + sched_data.cur = 6; + cycles_left--; + need_stop = 1; + } + + if (need_stop || sched_data.cur == 6) + { + rtx t = gen_insn_group_barrier (GEN_INT (3)); + last_issued = emit_insn_after (t, last_issued); + } + maybe_rotate (dump); + } + + cycles_left--; + while (cycles_left > 0) + { + rtx t = gen_bundle_selector (GEN_INT (0)); + last_issued = emit_insn_after (t, last_issued); + t = gen_nop_type (TYPE_M); + last_issued = emit_insn_after (t, last_issued); + t = gen_nop_type (TYPE_I); + last_issued = emit_insn_after (t, last_issued); + if (cycles_left > 1) + { + t = gen_insn_group_barrier (GEN_INT (2)); + last_issued = emit_insn_after (t, last_issued); + cycles_left--; + } + t = gen_nop_type (TYPE_I); + last_issued = emit_insn_after (t, last_issued); + t = gen_insn_group_barrier (GEN_INT (3)); + last_issued = emit_insn_after (t, last_issued); + cycles_left--; + } +} + +/* We are about to being issuing insns for this clock cycle. + Override the default sort algorithm to better slot instructions. */ + +static int +ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready, + reorder_type, clock_var) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + rtx *ready; + int *pn_ready; + int reorder_type, clock_var; +{ + int n_asms; + int n_ready = *pn_ready; + rtx *e_ready = ready + n_ready; + rtx *insnp; + + if (sched_verbose) + { + fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type); + dump_current_packet (dump); + } + + if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule) + { + for (insnp = ready; insnp < e_ready; insnp++) + { + rtx insn = *insnp; + enum attr_itanium_class t = ia64_safe_itanium_class (insn); + if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF + || t == ITANIUM_CLASS_ILOG + || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST) + { + rtx link; + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT + && REG_NOTE_KIND (link) != REG_DEP_ANTI) + { + rtx other = XEXP (link, 0); + enum attr_itanium_class t0 = ia64_safe_itanium_class (other); + if (t0 == ITANIUM_CLASS_MMSHF + || t0 == ITANIUM_CLASS_MMMUL) + { + nop_cycles_until (clock_var, sched_verbose ? dump : NULL); + goto out; + } + } + } + } + } + out: + + prev_first = sched_data.first_slot; + prev_cycle = clock_var; + + if (reorder_type == 0) + maybe_rotate (sched_verbose ? dump : NULL); + + /* First, move all USEs, CLOBBERs and other crud out of the way. */ + n_asms = 0; + for (insnp = ready; insnp < e_ready; insnp++) + if (insnp < e_ready) + { + rtx insn = *insnp; + enum attr_type t = ia64_safe_type (insn); + if (t == TYPE_UNKNOWN) + { + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0) + { + rtx lowest = ready[n_asms]; + ready[n_asms] = insn; + *insnp = lowest; + n_asms++; + } + else + { + rtx highest = ready[n_ready - 1]; + ready[n_ready - 1] = insn; + *insnp = highest; + if (ia64_final_schedule && group_barrier_needed_p (insn)) + { + schedule_stop (sched_verbose ? dump : NULL); + sched_data.last_was_stop = 1; + maybe_rotate (sched_verbose ? dump : NULL); + } + + return 1; + } + } + } + if (n_asms < n_ready) + { + /* Some normal insns to process. Skip the asms. */ + ready += n_asms; + n_ready -= n_asms; + } + else if (n_ready > 0) + { + /* Only asm insns left. */ + if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1])) + { + schedule_stop (sched_verbose ? dump : NULL); + sched_data.last_was_stop = 1; + maybe_rotate (sched_verbose ? dump : NULL); + } + cycle_end_fill_slots (sched_verbose ? dump : NULL); + return 1; + } + + if (ia64_final_schedule) + { + int nr_need_stop = 0; + + for (insnp = ready; insnp < e_ready; insnp++) + if (safe_group_barrier_needed_p (*insnp)) + nr_need_stop++; + + /* Schedule a stop bit if + - all insns require a stop bit, or + - we are starting a new cycle and _any_ insns require a stop bit. + The reason for the latter is that if our schedule is accurate, then + the additional stop won't decrease performance at this point (since + there's a split issue at this point anyway), but it gives us more + freedom when scheduling the currently ready insns. */ + if ((reorder_type == 0 && nr_need_stop) + || (reorder_type == 1 && n_ready == nr_need_stop)) + { + schedule_stop (sched_verbose ? dump : NULL); + sched_data.last_was_stop = 1; + maybe_rotate (sched_verbose ? dump : NULL); + if (reorder_type == 1) + return 0; + } + else + { + int deleted = 0; + insnp = e_ready; + /* Move down everything that needs a stop bit, preserving relative + order. */ + while (insnp-- > ready + deleted) + while (insnp >= ready + deleted) + { + rtx insn = *insnp; + if (! safe_group_barrier_needed_p (insn)) + break; + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + deleted++; + } + n_ready -= deleted; + ready += deleted; + if (deleted != nr_need_stop) + abort (); + } + } + + return itanium_reorder (sched_verbose ? dump : NULL, + ready, e_ready, reorder_type == 1); +} + +static int +ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) + FILE *dump; + int sched_verbose; + rtx *ready; + int *pn_ready; + int clock_var; +{ + return ia64_internal_sched_reorder (dump, sched_verbose, ready, + pn_ready, 0, clock_var); +} + +/* Like ia64_sched_reorder, but called after issuing each insn. + Override the default sort algorithm to better slot instructions. */ + +static int +ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + rtx *ready; + int *pn_ready; + int clock_var; +{ + if (sched_data.last_was_stop) + return 0; + + /* Detect one special case and try to optimize it. + If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs, + then we can get better code by transforming this to 1.MFB;; 2.MIx. */ + if (sched_data.first_slot == 1 + && sched_data.stopbit[0] + && ((sched_data.cur == 4 + && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A) + && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A) + && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A)) + || (sched_data.cur == 3 + && (sched_data.types[1] == TYPE_M + || sched_data.types[1] == TYPE_A) + && (sched_data.types[2] != TYPE_M + && sched_data.types[2] != TYPE_I + && sched_data.types[2] != TYPE_A)))) + + { + int i, best; + rtx stop = sched_data.insns[1]; + + /* Search backward for the stop bit that must be there. */ + while (1) + { + int insn_code; + + stop = PREV_INSN (stop); + if (GET_CODE (stop) != INSN) + abort (); + insn_code = recog_memoized (stop); + + /* Ignore cycle displays and .pred.rel.mutex. */ + if (insn_code == CODE_FOR_cycle_display + || insn_code == CODE_FOR_pred_rel_mutex + || insn_code == CODE_FOR_prologue_use) + continue; + + if (insn_code == CODE_FOR_insn_group_barrier) + break; + abort (); + } + + /* Adjust the stop bit's slot selector. */ + if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1) + abort (); + XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3); + + sched_data.stopbit[0] = 0; + sched_data.stopbit[2] = 1; + + sched_data.types[5] = sched_data.types[3]; + sched_data.types[4] = sched_data.types[2]; + sched_data.types[3] = sched_data.types[1]; + sched_data.insns[5] = sched_data.insns[3]; + sched_data.insns[4] = sched_data.insns[2]; + sched_data.insns[3] = sched_data.insns[1]; + sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0; + sched_data.cur += 2; + sched_data.first_slot = 3; + for (i = 0; i < NR_PACKETS; i++) + { + const struct ia64_packet *p = packets + i; + if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B) + { + sched_data.packet = p; + break; + } + } + rotate_one_bundle (sched_verbose ? dump : NULL); + + best = 6; + for (i = 0; i < NR_PACKETS; i++) + { + const struct ia64_packet *p = packets + i; + int split = get_split (p, sched_data.first_slot); + int next; + + /* Disallow multiway branches here. */ + if (p->t[1] == TYPE_B) + continue; + + if (packet_matches_p (p, split, &next) && next < best) + { + best = next; + sched_data.packet = p; + sched_data.split = split; + } + } + if (best == 6) + abort (); + } + + if (*pn_ready > 0) + { + int more = ia64_internal_sched_reorder (dump, sched_verbose, + ready, pn_ready, 1, + clock_var); + if (more) + return more; + /* Did we schedule a stop? If so, finish this cycle. */ + if (sched_data.cur == sched_data.first_slot) + return 0; + } + + if (sched_verbose) + fprintf (dump, "// Can't issue more this cycle; updating type array.\n"); + + cycle_end_fill_slots (sched_verbose ? dump : NULL); + if (sched_verbose) + dump_current_packet (dump); + return 0; +} + +/* We are about to issue INSN. Return the number of insns left on the + ready queue that can be issued this cycle. */ + +static int +ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) + FILE *dump; + int sched_verbose; + rtx insn; + int can_issue_more ATTRIBUTE_UNUSED; +{ + enum attr_type t = ia64_safe_type (insn); + + last_issued = insn; + + if (sched_data.last_was_stop) + { + int t = sched_data.first_slot; + if (t == 0) + t = 3; + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn); + init_insn_group_barriers (); + sched_data.last_was_stop = 0; + } + + if (t == TYPE_UNKNOWN) + { + if (sched_verbose) + fprintf (dump, "// Ignoring type %s\n", type_names[t]); + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0) + { + /* This must be some kind of asm. Clear the scheduling state. */ + rotate_two_bundles (sched_verbose ? dump : NULL); + if (ia64_final_schedule) + group_barrier_needed_p (insn); + } + return 1; + } + + /* This is _not_ just a sanity check. group_barrier_needed_p will update + important state info. Don't delete this test. */ + if (ia64_final_schedule + && group_barrier_needed_p (insn)) + abort (); + + sched_data.stopbit[sched_data.cur] = 0; + sched_data.insns[sched_data.cur] = insn; + sched_data.types[sched_data.cur] = t; + + sched_data.cur++; + if (sched_verbose) + fprintf (dump, "// Scheduling insn %d of type %s\n", + INSN_UID (insn), type_names[t]); + + if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule) + { + schedule_stop (sched_verbose ? dump : NULL); + sched_data.last_was_stop = 1; + } + + return 1; +} + +/* Free data allocated by ia64_sched_init. */ + +static void +ia64_sched_finish (dump, sched_verbose) + FILE *dump; + int sched_verbose; +{ + if (sched_verbose) + fprintf (dump, "// Finishing schedule.\n"); + rotate_two_bundles (NULL); + free (sched_types); + free (sched_ready); +} + +static rtx +ia64_cycle_display (clock, last) + int clock; + rtx last; +{ + if (ia64_final_schedule) + return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last); + else + return last; +} + +/* Emit pseudo-ops for the assembler to describe predicate relations. + At present this assumes that we only consider predicate pairs to + be mutex, and that the assembler can deduce proper values from + straight-line code. */ + +static void +emit_predicate_relation_info () +{ + int i; + + for (i = n_basic_blocks - 1; i >= 0; --i) + { + basic_block bb = BASIC_BLOCK (i); + int r; + rtx head = bb->head; + + /* We only need such notes at code labels. */ + if (GET_CODE (head) != CODE_LABEL) + continue; + if (GET_CODE (NEXT_INSN (head)) == NOTE + && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK) + head = NEXT_INSN (head); + + for (r = PR_REG (0); r < PR_REG (64); r += 2) + if (REGNO_REG_SET_P (bb->global_live_at_start, r)) + { + rtx p = gen_rtx_REG (BImode, r); + rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); + if (head == bb->end) + bb->end = n; + head = n; + } + } + + /* Look for conditional calls that do not return, and protect predicate + relations around them. Otherwise the assembler will assume the call + returns, and complain about uses of call-clobbered predicates after + the call. */ + for (i = n_basic_blocks - 1; i >= 0; --i) + { + basic_block bb = BASIC_BLOCK (i); + rtx insn = bb->head; + + while (1) + { + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == COND_EXEC + && find_reg_note (insn, REG_NORETURN, NULL_RTX)) + { + rtx b = emit_insn_before (gen_safe_across_calls_all (), insn); + rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn); + if (bb->head == insn) + bb->head = b; + if (bb->end == insn) + bb->end = a; + } + + if (insn == bb->end) + break; + insn = NEXT_INSN (insn); + } + } +} + +/* Generate a NOP instruction of type T. We will never generate L type + nops. */ + +static rtx +gen_nop_type (t) + enum attr_type t; +{ + switch (t) + { + case TYPE_M: + return gen_nop_m (); + case TYPE_I: + return gen_nop_i (); + case TYPE_B: + return gen_nop_b (); + case TYPE_F: + return gen_nop_f (); + case TYPE_X: + return gen_nop_x (); + default: + abort (); + } +} + +/* After the last scheduling pass, fill in NOPs. It's easier to do this + here than while scheduling. */ + +static void +ia64_emit_nops () +{ + rtx insn; + const struct bundle *b = 0; + int bundle_pos = 0; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + enum attr_type t; + pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx; + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22) + || GET_CODE (insn) == CODE_LABEL) + { + if (b) + while (bundle_pos < 3) + { + emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); + bundle_pos++; + } + if (GET_CODE (insn) != CODE_LABEL) + b = bundle + INTVAL (XVECEXP (pat, 0, 0)); + else + b = 0; + bundle_pos = 0; + continue; + } + else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2) + { + int t = INTVAL (XVECEXP (pat, 0, 0)); + if (b) + while (bundle_pos < t) + { + emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); + bundle_pos++; + } + continue; + } + + if (bundle_pos == 3) + b = 0; + + if (b && INSN_P (insn)) + { + t = ia64_safe_type (insn); + if (asm_noperands (PATTERN (insn)) >= 0 + || GET_CODE (PATTERN (insn)) == ASM_INPUT) + { + while (bundle_pos < 3) + { + emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); + bundle_pos++; + } + continue; + } + + if (t == TYPE_UNKNOWN) + continue; + while (bundle_pos < 3) + { + if (t == b->t[bundle_pos] + || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M + || b->t[bundle_pos] == TYPE_I))) + break; + + emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); + bundle_pos++; + } + if (bundle_pos < 3) + bundle_pos++; + } + } +} + +/* Perform machine dependent operations on the rtl chain INSNS. */ + +void +ia64_reorg (insns) + rtx insns; +{ + /* If optimizing, we'll have split before scheduling. */ + if (optimize == 0) + split_all_insns_noflow (); + + /* Make sure the CFG and global_live_at_start are correct + for emit_predicate_relation_info. */ + find_basic_blocks (insns, max_reg_num (), NULL); + life_analysis (insns, NULL, PROP_DEATH_NOTES); + + if (ia64_flag_schedule_insns2) + { + timevar_push (TV_SCHED2); + ia64_final_schedule = 1; + schedule_ebbs (rtl_dump_file); + ia64_final_schedule = 0; + timevar_pop (TV_SCHED2); + + /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same + place as they were during scheduling. */ + emit_insn_group_barriers (rtl_dump_file, insns); + ia64_emit_nops (); + } + else + emit_all_insn_group_barriers (rtl_dump_file, insns); + + /* A call must not be the last instruction in a function, so that the + return address is still within the function, so that unwinding works + properly. Note that IA-64 differs from dwarf2 on this point. */ + if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) + { + rtx insn; + int saw_stop = 0; + + insn = get_last_insn (); + if (! INSN_P (insn)) + insn = prev_active_insn (insn); + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == 2) + { + saw_stop = 1; + insn = prev_active_insn (insn); + } + if (GET_CODE (insn) == CALL_INSN) + { + if (! saw_stop) + emit_insn (gen_insn_group_barrier (GEN_INT (3))); + emit_insn (gen_break_f ()); + emit_insn (gen_insn_group_barrier (GEN_INT (3))); + } + } + + fixup_errata (); + emit_predicate_relation_info (); +} + +/* Return true if REGNO is used by the epilogue. */ + +int +ia64_epilogue_uses (regno) + int regno; +{ + switch (regno) + { + case R_GR (1): + /* When a function makes a call through a function descriptor, we + will write a (potentially) new value to "gp". After returning + from such a call, we need to make sure the function restores the + original gp-value, even if the function itself does not use the + gp anymore. */ + return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC)); + + case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): + case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): + /* For functions defined with the syscall_linkage attribute, all + input registers are marked as live at all function exits. This + prevents the register allocator from using the input registers, + which in turn makes it possible to restart a system call after + an interrupt without having to save/restore the input registers. + This also prevents kernel data from leaking to application code. */ + return lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; + + case R_BR (0): + /* Conditional return patterns can't represent the use of `b0' as + the return address, so we force the value live this way. */ + return 1; + + case AR_PFS_REGNUM: + /* Likewise for ar.pfs, which is used by br.ret. */ + return 1; + + default: + return 0; + } +} + +/* Table of valid machine attributes. */ +const struct attribute_spec ia64_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "syscall_linkage", 0, 0, false, true, true, NULL }, + { NULL, 0, 0, false, false, false, NULL } +}; + +/* For ia64, SYMBOL_REF_FLAG set means that it is a function. + + We add @ to the name if this goes in small data/bss. We can only put + a variable in small data/bss if it is defined in this module or a module + that we are statically linked with. We can't check the second condition, + but TREE_STATIC gives us the first one. */ + +/* ??? If we had IPA, we could check the second condition. We could support + programmer added section attributes if the variable is not defined in this + module. */ + +/* ??? See the v850 port for a cleaner way to do this. */ + +/* ??? We could also support own long data here. Generating movl/add/ld8 + instead of addl,ld8/ld8. This makes the code bigger, but should make the + code faster because there is one less load. This also includes incomplete + types which can't go in sdata/sbss. */ + +/* ??? See select_section. We must put short own readonly variables in + sdata/sbss instead of the more natural rodata, because we can't perform + the DECL_READONLY_SECTION test here. */ + +extern struct obstack * saveable_obstack; + +void +ia64_encode_section_info (decl) + tree decl; +{ + const char *symbol_str; + + if (TREE_CODE (decl) == FUNCTION_DECL) + { + SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; + return; + } + + /* Careful not to prod global register variables. */ + if (TREE_CODE (decl) != VAR_DECL + || GET_CODE (DECL_RTL (decl)) != MEM + || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) + return; + + symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0); + + /* We assume that -fpic is used only to create a shared library (dso). + With -fpic, no global data can ever be sdata. + Without -fpic, global common uninitialized data can never be sdata, since + it can unify with a real definition in a dso. */ + /* ??? Actually, we can put globals in sdata, as long as we don't use gprel + to access them. The linker may then be able to do linker relaxation to + optimize references to them. Currently sdata implies use of gprel. */ + /* We need the DECL_EXTERNAL check for C++. static class data members get + both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are + statically allocated, but the space is allocated somewhere else. Such + decls can not be own data. */ + if (! TARGET_NO_SDATA + && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl) + && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl)) + && ! (TREE_PUBLIC (decl) + && (flag_pic + || (DECL_COMMON (decl) + && (DECL_INITIAL (decl) == 0 + || DECL_INITIAL (decl) == error_mark_node)))) + /* Either the variable must be declared without a section attribute, + or the section must be sdata or sbss. */ + && (DECL_SECTION_NAME (decl) == 0 + || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".sdata") + || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".sbss"))) + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); + + /* If the variable has already been defined in the output file, then it + is too late to put it in sdata if it wasn't put there in the first + place. The test is here rather than above, because if it is already + in sdata, then it can stay there. */ + + if (TREE_ASM_WRITTEN (decl)) + ; + + /* If this is an incomplete type with size 0, then we can't put it in + sdata because it might be too big when completed. */ + else if (size > 0 + && size <= (HOST_WIDE_INT) ia64_section_threshold + && symbol_str[0] != SDATA_NAME_FLAG_CHAR) + { + size_t len = strlen (symbol_str); + char *newstr = alloca (len + 1); + const char *string; + + *newstr = SDATA_NAME_FLAG_CHAR; + memcpy (newstr + 1, symbol_str, len + 1); + + string = ggc_alloc_string (newstr, len + 1); + XSTR (XEXP (DECL_RTL (decl), 0), 0) = string; + } + } + /* This decl is marked as being in small data/bss but it shouldn't + be; one likely explanation for this is that the decl has been + moved into a different section from the one it was in when + ENCODE_SECTION_INFO was first called. Remove the '@'. */ + else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR) + { + XSTR (XEXP (DECL_RTL (decl), 0), 0) + = ggc_strdup (symbol_str + 1); + } +} + +/* Output assembly directives for prologue regions. */ + +/* The current basic block number. */ + +static int block_num; + +/* True if we need a copy_state command at the start of the next block. */ + +static int need_copy_state; + +/* The function emits unwind directives for the start of an epilogue. */ + +static void +process_epilogue () +{ + /* If this isn't the last block of the function, then we need to label the + current state, and copy it back in at the start of the next block. */ + + if (block_num != n_basic_blocks - 1) + { + fprintf (asm_out_file, "\t.label_state 1\n"); + need_copy_state = 1; + } + + fprintf (asm_out_file, "\t.restore sp\n"); +} + +/* This function processes a SET pattern looking for specific patterns + which result in emitting an assembly directive required for unwinding. */ + +static int +process_set (asm_out_file, pat) + FILE *asm_out_file; + rtx pat; +{ + rtx src = SET_SRC (pat); + rtx dest = SET_DEST (pat); + int src_regno, dest_regno; + + /* Look for the ALLOC insn. */ + if (GET_CODE (src) == UNSPEC_VOLATILE + && XINT (src, 1) == 0 + && GET_CODE (dest) == REG) + { + dest_regno = REGNO (dest); + + /* If this isn't the final destination for ar.pfs, the alloc + shouldn't have been marked frame related. */ + if (dest_regno != current_frame_info.reg_save_ar_pfs) + abort (); + + fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", + ia64_dbx_register_number (dest_regno)); + return 1; + } + + /* Look for SP = .... */ + if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) + { + if (GET_CODE (src) == PLUS) + { + rtx op0 = XEXP (src, 0); + rtx op1 = XEXP (src, 1); + if (op0 == dest && GET_CODE (op1) == CONST_INT) + { + if (INTVAL (op1) < 0) + { + fputs ("\t.fframe ", asm_out_file); + fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, + -INTVAL (op1)); + fputc ('\n', asm_out_file); + } + else + process_epilogue (); + } + else + abort (); + } + else if (GET_CODE (src) == REG + && REGNO (src) == HARD_FRAME_POINTER_REGNUM) + process_epilogue (); + else + abort (); + + return 1; + } + + /* Register move we need to look at. */ + if (GET_CODE (dest) == REG && GET_CODE (src) == REG) + { + src_regno = REGNO (src); + dest_regno = REGNO (dest); + + switch (src_regno) + { + case BR_REG (0): + /* Saving return address pointer. */ + if (dest_regno != current_frame_info.reg_save_b0) + abort (); + fprintf (asm_out_file, "\t.save rp, r%d\n", + ia64_dbx_register_number (dest_regno)); + return 1; + + case PR_REG (0): + if (dest_regno != current_frame_info.reg_save_pr) + abort (); + fprintf (asm_out_file, "\t.save pr, r%d\n", + ia64_dbx_register_number (dest_regno)); + return 1; + + case AR_UNAT_REGNUM: + if (dest_regno != current_frame_info.reg_save_ar_unat) + abort (); + fprintf (asm_out_file, "\t.save ar.unat, r%d\n", + ia64_dbx_register_number (dest_regno)); + return 1; + + case AR_LC_REGNUM: + if (dest_regno != current_frame_info.reg_save_ar_lc) + abort (); + fprintf (asm_out_file, "\t.save ar.lc, r%d\n", + ia64_dbx_register_number (dest_regno)); + return 1; + + case STACK_POINTER_REGNUM: + if (dest_regno != HARD_FRAME_POINTER_REGNUM + || ! frame_pointer_needed) + abort (); + fprintf (asm_out_file, "\t.vframe r%d\n", + ia64_dbx_register_number (dest_regno)); + return 1; + + default: + /* Everything else should indicate being stored to memory. */ + abort (); + } + } + + /* Memory store we need to look at. */ + if (GET_CODE (dest) == MEM && GET_CODE (src) == REG) + { + long off; + rtx base; + const char *saveop; + + if (GET_CODE (XEXP (dest, 0)) == REG) + { + base = XEXP (dest, 0); + off = 0; + } + else if (GET_CODE (XEXP (dest, 0)) == PLUS + && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT) + { + base = XEXP (XEXP (dest, 0), 0); + off = INTVAL (XEXP (XEXP (dest, 0), 1)); + } + else + abort (); + + if (base == hard_frame_pointer_rtx) + { + saveop = ".savepsp"; + off = - off; + } + else if (base == stack_pointer_rtx) + saveop = ".savesp"; + else + abort (); + + src_regno = REGNO (src); + switch (src_regno) + { + case BR_REG (0): + if (current_frame_info.reg_save_b0 != 0) + abort (); + fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off); + return 1; + + case PR_REG (0): + if (current_frame_info.reg_save_pr != 0) + abort (); + fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off); + return 1; + + case AR_LC_REGNUM: + if (current_frame_info.reg_save_ar_lc != 0) + abort (); + fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off); + return 1; + + case AR_PFS_REGNUM: + if (current_frame_info.reg_save_ar_pfs != 0) + abort (); + fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off); + return 1; + + case AR_UNAT_REGNUM: + if (current_frame_info.reg_save_ar_unat != 0) + abort (); + fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off); + return 1; + + case GR_REG (4): + case GR_REG (5): + case GR_REG (6): + case GR_REG (7): + fprintf (asm_out_file, "\t.save.g 0x%x\n", + 1 << (src_regno - GR_REG (4))); + return 1; + + case BR_REG (1): + case BR_REG (2): + case BR_REG (3): + case BR_REG (4): + case BR_REG (5): + fprintf (asm_out_file, "\t.save.b 0x%x\n", + 1 << (src_regno - BR_REG (1))); + return 1; + + case FR_REG (2): + case FR_REG (3): + case FR_REG (4): + case FR_REG (5): + fprintf (asm_out_file, "\t.save.f 0x%x\n", + 1 << (src_regno - FR_REG (2))); + return 1; + + case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): + case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): + case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): + case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): + fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", + 1 << (src_regno - FR_REG (12))); + return 1; + + default: + return 0; + } + } + + return 0; +} + + +/* This function looks at a single insn and emits any directives + required to unwind this insn. */ +void +process_for_unwind_directive (asm_out_file, insn) + FILE *asm_out_file; + rtx insn; +{ + if (flag_unwind_tables + || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) + { + rtx pat; + + if (GET_CODE (insn) == NOTE + && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) + { + block_num = NOTE_BASIC_BLOCK (insn)->index; + + /* Restore unwind state from immediately before the epilogue. */ + if (need_copy_state) + { + fprintf (asm_out_file, "\t.body\n"); + fprintf (asm_out_file, "\t.copy_state 1\n"); + need_copy_state = 0; + } + } + + if (! RTX_FRAME_RELATED_P (insn)) + return; + + pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); + if (pat) + pat = XEXP (pat, 0); + else + pat = PATTERN (insn); + + switch (GET_CODE (pat)) + { + case SET: + process_set (asm_out_file, pat); + break; + + case PARALLEL: + { + int par_index; + int limit = XVECLEN (pat, 0); + for (par_index = 0; par_index < limit; par_index++) + { + rtx x = XVECEXP (pat, 0, par_index); + if (GET_CODE (x) == SET) + process_set (asm_out_file, x); + } + break; + } + + default: + abort (); + } + } +} + + +void +ia64_init_builtins () +{ + tree psi_type_node = build_pointer_type (integer_type_node); + tree pdi_type_node = build_pointer_type (long_integer_type_node); + tree endlink = void_list_node; + + /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ + tree si_ftype_psi_si_si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, psi_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + + /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ + tree di_ftype_pdi_di_di + = build_function_type (long_integer_type_node, + tree_cons (NULL_TREE, pdi_type_node, + tree_cons (NULL_TREE, + long_integer_type_node, + tree_cons (NULL_TREE, + long_integer_type_node, + endlink)))); + /* __sync_synchronize */ + tree void_ftype_void + = build_function_type (void_type_node, endlink); + + /* __sync_lock_test_and_set_si */ + tree si_ftype_psi_si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, psi_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + + /* __sync_lock_test_and_set_di */ + tree di_ftype_pdi_di + = build_function_type (long_integer_type_node, + tree_cons (NULL_TREE, pdi_type_node, + tree_cons (NULL_TREE, long_integer_type_node, + endlink))); + + /* __sync_lock_release_si */ + tree void_ftype_psi + = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node, + endlink)); + + /* __sync_lock_release_di */ + tree void_ftype_pdi + = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node, + endlink)); + +#define def_builtin(name, type, code) \ + builtin_function ((name), (type), (code), BUILT_IN_MD, NULL) + + def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, + IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); + def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, + IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); + def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, + IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); + def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, + IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); + + def_builtin ("__sync_synchronize", void_ftype_void, + IA64_BUILTIN_SYNCHRONIZE); + + def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, + IA64_BUILTIN_LOCK_TEST_AND_SET_SI); + def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, + IA64_BUILTIN_LOCK_TEST_AND_SET_DI); + def_builtin ("__sync_lock_release_si", void_ftype_psi, + IA64_BUILTIN_LOCK_RELEASE_SI); + def_builtin ("__sync_lock_release_di", void_ftype_pdi, + IA64_BUILTIN_LOCK_RELEASE_DI); + + def_builtin ("__builtin_ia64_bsp", + build_function_type (ptr_type_node, endlink), + IA64_BUILTIN_BSP); + + def_builtin ("__builtin_ia64_flushrs", + build_function_type (void_type_node, endlink), + IA64_BUILTIN_FLUSHRS); + + def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si, + IA64_BUILTIN_FETCH_AND_ADD_SI); + def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si, + IA64_BUILTIN_FETCH_AND_SUB_SI); + def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si, + IA64_BUILTIN_FETCH_AND_OR_SI); + def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si, + IA64_BUILTIN_FETCH_AND_AND_SI); + def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si, + IA64_BUILTIN_FETCH_AND_XOR_SI); + def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si, + IA64_BUILTIN_FETCH_AND_NAND_SI); + + def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si, + IA64_BUILTIN_ADD_AND_FETCH_SI); + def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si, + IA64_BUILTIN_SUB_AND_FETCH_SI); + def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si, + IA64_BUILTIN_OR_AND_FETCH_SI); + def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si, + IA64_BUILTIN_AND_AND_FETCH_SI); + def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si, + IA64_BUILTIN_XOR_AND_FETCH_SI); + def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si, + IA64_BUILTIN_NAND_AND_FETCH_SI); + + def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di, + IA64_BUILTIN_FETCH_AND_ADD_DI); + def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di, + IA64_BUILTIN_FETCH_AND_SUB_DI); + def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di, + IA64_BUILTIN_FETCH_AND_OR_DI); + def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di, + IA64_BUILTIN_FETCH_AND_AND_DI); + def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di, + IA64_BUILTIN_FETCH_AND_XOR_DI); + def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di, + IA64_BUILTIN_FETCH_AND_NAND_DI); + + def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di, + IA64_BUILTIN_ADD_AND_FETCH_DI); + def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di, + IA64_BUILTIN_SUB_AND_FETCH_DI); + def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di, + IA64_BUILTIN_OR_AND_FETCH_DI); + def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di, + IA64_BUILTIN_AND_AND_FETCH_DI); + def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di, + IA64_BUILTIN_XOR_AND_FETCH_DI); + def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di, + IA64_BUILTIN_NAND_AND_FETCH_DI); + +#undef def_builtin +} + +/* Expand fetch_and_op intrinsics. The basic code sequence is: + + mf + tmp = [ptr]; + do { + ret = tmp; + ar.ccv = tmp; + tmp <op>= value; + cmpxchgsz.acq tmp = [ptr], tmp + } while (tmp != ret) +*/ + +static rtx +ia64_expand_fetch_and_op (binoptab, mode, arglist, target) + optab binoptab; + enum machine_mode mode; + tree arglist; + rtx target; +{ + rtx ret, label, tmp, ccv, insn, mem, value; + tree arg0, arg1; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + mem = expand_expr (arg0, NULL_RTX, Pmode, 0); + value = expand_expr (arg1, NULL_RTX, mode, 0); + + mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); + MEM_VOLATILE_P (mem) = 1; + + if (target && register_operand (target, mode)) + ret = target; + else + ret = gen_reg_rtx (mode); + + emit_insn (gen_mf ()); + + /* Special case for fetchadd instructions. */ + if (binoptab == add_optab && fetchadd_operand (value, VOIDmode)) + { + if (mode == SImode) + insn = gen_fetchadd_acq_si (ret, mem, value); + else + insn = gen_fetchadd_acq_di (ret, mem, value); + emit_insn (insn); + return ret; + } + + tmp = gen_reg_rtx (mode); + ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); + emit_move_insn (tmp, mem); + + label = gen_label_rtx (); + emit_label (label); + emit_move_insn (ret, tmp); + emit_move_insn (ccv, tmp); + + /* Perform the specific operation. Special case NAND by noticing + one_cmpl_optab instead. */ + if (binoptab == one_cmpl_optab) + { + tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); + binoptab = and_optab; + } + tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN); + + if (mode == SImode) + insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv); + else + insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv); + emit_insn (insn); + + emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label); + + return ret; +} + +/* Expand op_and_fetch intrinsics. The basic code sequence is: + + mf + tmp = [ptr]; + do { + old = tmp; + ar.ccv = tmp; + ret = tmp + value; + cmpxchgsz.acq tmp = [ptr], ret + } while (tmp != old) +*/ + +static rtx +ia64_expand_op_and_fetch (binoptab, mode, arglist, target) + optab binoptab; + enum machine_mode mode; + tree arglist; + rtx target; +{ + rtx old, label, tmp, ret, ccv, insn, mem, value; + tree arg0, arg1; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + mem = expand_expr (arg0, NULL_RTX, Pmode, 0); + value = expand_expr (arg1, NULL_RTX, mode, 0); + + mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); + MEM_VOLATILE_P (mem) = 1; + + if (target && ! register_operand (target, mode)) + target = NULL_RTX; + + emit_insn (gen_mf ()); + tmp = gen_reg_rtx (mode); + old = gen_reg_rtx (mode); + ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); + + emit_move_insn (tmp, mem); + + label = gen_label_rtx (); + emit_label (label); + emit_move_insn (old, tmp); + emit_move_insn (ccv, tmp); + + /* Perform the specific operation. Special case NAND by noticing + one_cmpl_optab instead. */ + if (binoptab == one_cmpl_optab) + { + tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); + binoptab = and_optab; + } + ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN); + + if (mode == SImode) + insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv); + else + insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv); + emit_insn (insn); + + emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label); + + return ret; +} + +/* Expand val_ and bool_compare_and_swap. For val_ we want: + + ar.ccv = oldval + mf + cmpxchgsz.acq ret = [ptr], newval, ar.ccv + return ret + + For bool_ it's the same except return ret == oldval. +*/ + +static rtx +ia64_expand_compare_and_swap (mode, boolp, arglist, target) + enum machine_mode mode; + int boolp; + tree arglist; + rtx target; +{ + tree arg0, arg1, arg2; + rtx mem, old, new, ccv, tmp, insn; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + mem = expand_expr (arg0, NULL_RTX, Pmode, 0); + old = expand_expr (arg1, NULL_RTX, mode, 0); + new = expand_expr (arg2, NULL_RTX, mode, 0); + + mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); + MEM_VOLATILE_P (mem) = 1; + + if (! register_operand (old, mode)) + old = copy_to_mode_reg (mode, old); + if (! register_operand (new, mode)) + new = copy_to_mode_reg (mode, new); + + if (! boolp && target && register_operand (target, mode)) + tmp = target; + else + tmp = gen_reg_rtx (mode); + + ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); + emit_move_insn (ccv, old); + emit_insn (gen_mf ()); + if (mode == SImode) + insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv); + else + insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv); + emit_insn (insn); + + if (boolp) + { + if (! target) + target = gen_reg_rtx (mode); + return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1); + } + else + return tmp; +} + +/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */ + +static rtx +ia64_expand_lock_test_and_set (mode, arglist, target) + enum machine_mode mode; + tree arglist; + rtx target; +{ + tree arg0, arg1; + rtx mem, new, ret, insn; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + mem = expand_expr (arg0, NULL_RTX, Pmode, 0); + new = expand_expr (arg1, NULL_RTX, mode, 0); + + mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); + MEM_VOLATILE_P (mem) = 1; + if (! register_operand (new, mode)) + new = copy_to_mode_reg (mode, new); + + if (target && register_operand (target, mode)) + ret = target; + else + ret = gen_reg_rtx (mode); + + if (mode == SImode) + insn = gen_xchgsi (ret, mem, new); + else + insn = gen_xchgdi (ret, mem, new); + emit_insn (insn); + + return ret; +} + +/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */ + +static rtx +ia64_expand_lock_release (mode, arglist, target) + enum machine_mode mode; + tree arglist; + rtx target ATTRIBUTE_UNUSED; +{ + tree arg0; + rtx mem; + + arg0 = TREE_VALUE (arglist); + mem = expand_expr (arg0, NULL_RTX, Pmode, 0); + + mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); + MEM_VOLATILE_P (mem) = 1; + + emit_move_insn (mem, const0_rtx); + + return const0_rtx; +} + +rtx +ia64_expand_builtin (exp, target, subtarget, mode, ignore) + tree exp; + rtx target; + rtx subtarget ATTRIBUTE_UNUSED; + enum machine_mode mode ATTRIBUTE_UNUSED; + int ignore ATTRIBUTE_UNUSED; +{ + tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arglist = TREE_OPERAND (exp, 1); + + switch (fcode) + { + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: + case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: + case IA64_BUILTIN_LOCK_RELEASE_SI: + case IA64_BUILTIN_FETCH_AND_ADD_SI: + case IA64_BUILTIN_FETCH_AND_SUB_SI: + case IA64_BUILTIN_FETCH_AND_OR_SI: + case IA64_BUILTIN_FETCH_AND_AND_SI: + case IA64_BUILTIN_FETCH_AND_XOR_SI: + case IA64_BUILTIN_FETCH_AND_NAND_SI: + case IA64_BUILTIN_ADD_AND_FETCH_SI: + case IA64_BUILTIN_SUB_AND_FETCH_SI: + case IA64_BUILTIN_OR_AND_FETCH_SI: + case IA64_BUILTIN_AND_AND_FETCH_SI: + case IA64_BUILTIN_XOR_AND_FETCH_SI: + case IA64_BUILTIN_NAND_AND_FETCH_SI: + mode = SImode; + break; + + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: + case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: + case IA64_BUILTIN_LOCK_RELEASE_DI: + case IA64_BUILTIN_FETCH_AND_ADD_DI: + case IA64_BUILTIN_FETCH_AND_SUB_DI: + case IA64_BUILTIN_FETCH_AND_OR_DI: + case IA64_BUILTIN_FETCH_AND_AND_DI: + case IA64_BUILTIN_FETCH_AND_XOR_DI: + case IA64_BUILTIN_FETCH_AND_NAND_DI: + case IA64_BUILTIN_ADD_AND_FETCH_DI: + case IA64_BUILTIN_SUB_AND_FETCH_DI: + case IA64_BUILTIN_OR_AND_FETCH_DI: + case IA64_BUILTIN_AND_AND_FETCH_DI: + case IA64_BUILTIN_XOR_AND_FETCH_DI: + case IA64_BUILTIN_NAND_AND_FETCH_DI: + mode = DImode; + break; + + default: + break; + } + + switch (fcode) + { + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: + return ia64_expand_compare_and_swap (mode, 1, arglist, target); + + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: + return ia64_expand_compare_and_swap (mode, 0, arglist, target); + + case IA64_BUILTIN_SYNCHRONIZE: + emit_insn (gen_mf ()); + return const0_rtx; + + case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: + case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: + return ia64_expand_lock_test_and_set (mode, arglist, target); + + case IA64_BUILTIN_LOCK_RELEASE_SI: + case IA64_BUILTIN_LOCK_RELEASE_DI: + return ia64_expand_lock_release (mode, arglist, target); + + case IA64_BUILTIN_BSP: + if (! target || ! register_operand (target, DImode)) + target = gen_reg_rtx (DImode); + emit_insn (gen_bsp_value (target)); + return target; + + case IA64_BUILTIN_FLUSHRS: + emit_insn (gen_flushrs ()); + return const0_rtx; + + case IA64_BUILTIN_FETCH_AND_ADD_SI: + case IA64_BUILTIN_FETCH_AND_ADD_DI: + return ia64_expand_fetch_and_op (add_optab, mode, arglist, target); + + case IA64_BUILTIN_FETCH_AND_SUB_SI: + case IA64_BUILTIN_FETCH_AND_SUB_DI: + return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target); + + case IA64_BUILTIN_FETCH_AND_OR_SI: + case IA64_BUILTIN_FETCH_AND_OR_DI: + return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target); + + case IA64_BUILTIN_FETCH_AND_AND_SI: + case IA64_BUILTIN_FETCH_AND_AND_DI: + return ia64_expand_fetch_and_op (and_optab, mode, arglist, target); + + case IA64_BUILTIN_FETCH_AND_XOR_SI: + case IA64_BUILTIN_FETCH_AND_XOR_DI: + return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target); + + case IA64_BUILTIN_FETCH_AND_NAND_SI: + case IA64_BUILTIN_FETCH_AND_NAND_DI: + return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target); + + case IA64_BUILTIN_ADD_AND_FETCH_SI: + case IA64_BUILTIN_ADD_AND_FETCH_DI: + return ia64_expand_op_and_fetch (add_optab, mode, arglist, target); + + case IA64_BUILTIN_SUB_AND_FETCH_SI: + case IA64_BUILTIN_SUB_AND_FETCH_DI: + return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target); + + case IA64_BUILTIN_OR_AND_FETCH_SI: + case IA64_BUILTIN_OR_AND_FETCH_DI: + return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target); + + case IA64_BUILTIN_AND_AND_FETCH_SI: + case IA64_BUILTIN_AND_AND_FETCH_DI: + return ia64_expand_op_and_fetch (and_optab, mode, arglist, target); + + case IA64_BUILTIN_XOR_AND_FETCH_SI: + case IA64_BUILTIN_XOR_AND_FETCH_DI: + return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target); + + case IA64_BUILTIN_NAND_AND_FETCH_SI: + case IA64_BUILTIN_NAND_AND_FETCH_DI: + return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target); + + default: + break; + } + + return NULL_RTX; +} + +/* For the HP-UX IA64 aggregate parameters are passed stored in the + most significant bits of the stack slot. */ + +enum direction +ia64_hpux_function_arg_padding (mode, type) + enum machine_mode mode; + tree type; +{ + /* Exception to normal case for structures/unions/etc. */ + + if (type && AGGREGATE_TYPE_P (type) + && int_size_in_bytes (type) < UNITS_PER_WORD) + return upward; + + /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN + hardwired to be true. */ + + return((mode == BLKmode + ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT)) + : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) + ? downward : upward); +} diff --git a/contrib/gcc/config/ia64/ia64.h b/contrib/gcc/config/ia64/ia64.h new file mode 100644 index 0000000..1900717 --- /dev/null +++ b/contrib/gcc/config/ia64/ia64.h @@ -0,0 +1,2530 @@ +/* Definitions of target machine GNU compiler. IA-64 version. + Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + Contributed by James E. Wilson <wilson@cygnus.com> and + David Mosberger <davidm@hpl.hp.com>. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* ??? Look at ABI group documents for list of preprocessor macros and + other features required for ABI compliance. */ + +/* ??? Functions containing a non-local goto target save many registers. Why? + See for instance execute/920428-2.c. */ + +/* ??? Add support for short data/bss sections. */ + + +/* Run-time target specifications */ + +#define CPP_CPU_SPEC "\ + -Acpu=ia64 -Amachine=ia64 \ + %{!ansi:%{!std=c*:%{!std=i*:-Dia64}}} -D__ia64 -D__ia64__" + +#define CC1_SPEC "%(cc1_cpu) " + +/* This declaration should be present. */ +extern int target_flags; + +/* This series of macros is to allow compiler command arguments to enable or + disable the use of optional features of the target machine. */ + +#define MASK_BIG_ENDIAN 0x00000001 /* Generate big endian code. */ + +#define MASK_GNU_AS 0x00000002 /* Generate code for GNU as. */ + +#define MASK_GNU_LD 0x00000004 /* Generate code for GNU ld. */ + +#define MASK_NO_PIC 0x00000008 /* Generate code without GP reg. */ + +#define MASK_VOL_ASM_STOP 0x00000010 /* Emit stop bits for vol ext asm. */ + +#define MASK_ILP32 0x00000020 /* Generate ILP32 code. */ + +#define MASK_B_STEP 0x00000040 /* Emit code for Itanium B step. */ + +#define MASK_REG_NAMES 0x00000080 /* Use in/loc/out register names. */ + +#define MASK_NO_SDATA 0x00000100 /* Disable sdata/scommon/sbss. */ + +#define MASK_CONST_GP 0x00000200 /* treat gp as program-wide constant */ + +#define MASK_AUTO_PIC 0x00000400 /* generate automatically PIC */ + +#define MASK_INLINE_DIV_LAT 0x00000800 /* inline div, min latency. */ + +#define MASK_INLINE_DIV_THR 0x00001000 /* inline div, max throughput. */ + +#define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */ + +#define TARGET_BIG_ENDIAN (target_flags & MASK_BIG_ENDIAN) + +#define TARGET_GNU_AS (target_flags & MASK_GNU_AS) + +#define TARGET_GNU_LD (target_flags & MASK_GNU_LD) + +#define TARGET_NO_PIC (target_flags & MASK_NO_PIC) + +#define TARGET_VOL_ASM_STOP (target_flags & MASK_VOL_ASM_STOP) + +#define TARGET_ILP32 (target_flags & MASK_ILP32) + +#define TARGET_B_STEP (target_flags & MASK_B_STEP) + +#define TARGET_REG_NAMES (target_flags & MASK_REG_NAMES) + +#define TARGET_NO_SDATA (target_flags & MASK_NO_SDATA) + +#define TARGET_CONST_GP (target_flags & MASK_CONST_GP) + +#define TARGET_AUTO_PIC (target_flags & MASK_AUTO_PIC) + +#define TARGET_INLINE_DIV_LAT (target_flags & MASK_INLINE_DIV_LAT) + +#define TARGET_INLINE_DIV_THR (target_flags & MASK_INLINE_DIV_THR) + +#define TARGET_INLINE_DIV \ + (target_flags & (MASK_INLINE_DIV_LAT | MASK_INLINE_DIV_THR)) + +#define TARGET_DWARF2_ASM (target_flags & MASK_DWARF2_ASM) + +/* This macro defines names of command options to set and clear bits in + `target_flags'. Its definition is an initializer with a subgrouping for + each command option. */ + +#define TARGET_SWITCHES \ +{ \ + { "big-endian", MASK_BIG_ENDIAN, \ + N_("Generate big endian code") }, \ + { "little-endian", -MASK_BIG_ENDIAN, \ + N_("Generate little endian code") }, \ + { "gnu-as", MASK_GNU_AS, \ + N_("Generate code for GNU as") }, \ + { "no-gnu-as", -MASK_GNU_AS, \ + N_("Generate code for Intel as") }, \ + { "gnu-ld", MASK_GNU_LD, \ + N_("Generate code for GNU ld") }, \ + { "no-gnu-ld", -MASK_GNU_LD, \ + N_("Generate code for Intel ld") }, \ + { "no-pic", MASK_NO_PIC, \ + N_("Generate code without GP reg") }, \ + { "volatile-asm-stop", MASK_VOL_ASM_STOP, \ + N_("Emit stop bits before and after volatile extended asms") }, \ + { "no-volatile-asm-stop", -MASK_VOL_ASM_STOP, \ + N_("Don't emit stop bits before and after volatile extended asms") }, \ + { "b-step", MASK_B_STEP, \ + N_("Emit code for Itanium (TM) processor B step")}, \ + { "register-names", MASK_REG_NAMES, \ + N_("Use in/loc/out register names")}, \ + { "no-sdata", MASK_NO_SDATA, \ + N_("Disable use of sdata/scommon/sbss")}, \ + { "sdata", -MASK_NO_SDATA, \ + N_("Enable use of sdata/scommon/sbss")}, \ + { "constant-gp", MASK_CONST_GP, \ + N_("gp is constant (but save/restore gp on indirect calls)") }, \ + { "auto-pic", MASK_AUTO_PIC, \ + N_("Generate self-relocatable code") }, \ + { "inline-divide-min-latency", MASK_INLINE_DIV_LAT, \ + N_("Generate inline division, optimize for latency") }, \ + { "inline-divide-max-throughput", MASK_INLINE_DIV_THR, \ + N_("Generate inline division, optimize for throughput") }, \ + { "dwarf2-asm", MASK_DWARF2_ASM, \ + N_("Enable Dwarf 2 line debug info via GNU as")}, \ + { "no-dwarf2-asm", -MASK_DWARF2_ASM, \ + N_("Disable Dwarf 2 line debug info via GNU as")}, \ + SUBTARGET_SWITCHES \ + { "", TARGET_DEFAULT | TARGET_CPU_DEFAULT, \ + NULL } \ +} + +/* Default target_flags if no switches are specified */ + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_DWARF2_ASM +#endif + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +#ifndef SUBTARGET_SWITCHES +#define SUBTARGET_SWITCHES +#endif + +/* This macro is similar to `TARGET_SWITCHES' but defines names of command + options that have values. Its definition is an initializer with a + subgrouping for each command option. */ + +extern const char *ia64_fixed_range_string; +#define TARGET_OPTIONS \ +{ \ + { "fixed-range=", &ia64_fixed_range_string, \ + N_("Specify range of registers to make fixed")}, \ +} + +/* Sometimes certain combinations of command options do not make sense on a + particular target machine. You can define a macro `OVERRIDE_OPTIONS' to + take account of this. This macro, if defined, is executed once just after + all the command options have been parsed. */ + +#define OVERRIDE_OPTIONS ia64_override_options () + +/* Some machines may desire to change what optimizations are performed for + various optimization levels. This macro, if defined, is executed once just + after the optimization level is determined and before the remainder of the + command options have been parsed. Values set in this macro are used as the + default values for the other command line options. */ + +/* #define OPTIMIZATION_OPTIONS(LEVEL,SIZE) */ + +/* Driver configuration */ + +/* A C string constant that tells the GNU CC driver program options to pass to + CPP. It can also specify how to translate options you give to GNU CC into + options for GNU CC to pass to the CPP. */ + +/* ??? __LONG_MAX__ depends on LP64/ILP32 switch. */ +/* ??? An alternative is to modify glimits.h to check for __LP64__ instead + of checked for CPU specific defines. We could also get rid of all LONG_MAX + defines in other tm.h files. */ +#define CPP_SPEC \ + "%{mcpu=itanium:-D__itanium__} %{mbig-endian:-D__BIG_ENDIAN__} \ + -D__LONG_MAX__=9223372036854775807L" + +/* This is always "long" so it doesn't "change" in ILP32 vs. LP64. */ +/* #define NO_BUILTIN_SIZE_TYPE */ + +/* This is always "long" so it doesn't "change" in ILP32 vs. LP64. */ +/* #define NO_BUILTIN_PTRDIFF_TYPE */ + +/* A C string constant that tells the GNU CC driver program options to pass to + `cc1'. It can also specify how to translate options you give to GNU CC into + options for GNU CC to pass to the `cc1'. */ + +#undef CC1_SPEC +#define CC1_SPEC "%{G*}" + +/* A C string constant that tells the GNU CC driver program options to pass to + `cc1plus'. It can also specify how to translate options you give to GNU CC + into options for GNU CC to pass to the `cc1plus'. */ + +/* #define CC1PLUS_SPEC "" */ + +/* Storage Layout */ + +/* Define this macro to have the value 1 if the most significant bit in a byte + has the lowest number; otherwise define it to have the value zero. */ + +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +/* Define this macro to have the value 1 if, in a multiword object, the most + significant word has the lowest number. */ + +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +#if defined(__BIG_ENDIAN__) +#define LIBGCC2_WORDS_BIG_ENDIAN 1 +#else +#define LIBGCC2_WORDS_BIG_ENDIAN 0 +#endif + +#define BITS_PER_UNIT 8 + +#define BITS_PER_WORD 64 + +#define UNITS_PER_WORD 8 + +#define POINTER_SIZE (TARGET_ILP32 ? 32 : 64) + +/* A C expression whose value is zero if pointers that need to be extended + from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and one if + they are zero-extended and negative one if there is an ptr_extend operation. + + You need not define this macro if the `POINTER_SIZE' is equal to the width + of `Pmode'. */ +/* Need this for 32 bit pointers, see hpux.h for setting it. */ +/* #define POINTERS_EXTEND_UNSIGNED */ + +/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and + which has the specified mode and signedness is to be stored in a register. + This macro is only called when TYPE is a scalar type. */ +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ +do \ + { \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + (MODE) = SImode; \ + } \ +while (0) + +/* ??? ABI doesn't allow us to define this. */ +/* #define PROMOTE_FUNCTION_ARGS */ + +/* ??? ABI doesn't allow us to define this. */ +/* #define PROMOTE_FUNCTION_RETURN */ + +#define PARM_BOUNDARY 64 + +/* Define this macro if you wish to preserve a certain alignment for the stack + pointer. The definition is a C expression for the desired alignment + (measured in bits). */ + +#define STACK_BOUNDARY 128 + +/* Align frames on double word boundaries */ +#ifndef IA64_STACK_ALIGN +#define IA64_STACK_ALIGN(LOC) (((LOC) + 15) & ~15) +#endif + +#define FUNCTION_BOUNDARY 128 + +/* Optional x86 80-bit float, quad-precision 128-bit float, and quad-word + 128 bit integers all require 128 bit alignment. */ +#define BIGGEST_ALIGNMENT 128 + +/* If defined, a C expression to compute the alignment for a static variable. + TYPE is the data type, and ALIGN is the alignment that the object + would ordinarily have. The value of this macro is used instead of that + alignment to align the object. */ + +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +/* If defined, a C expression to compute the alignment given to a constant that + is being placed in memory. CONSTANT is the constant and ALIGN is the + alignment that the object would ordinarily have. The value of this macro is + used instead of that alignment to align the object. */ + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define STRICT_ALIGNMENT 1 + +/* Define this if you wish to imitate the way many other C compilers handle + alignment of bitfields and the structures that contain them. + The behavior is that the type written for a bitfield (`int', `short', or + other integer type) imposes an alignment for the entire structure, as if the + structure really did contain an ordinary field of that type. In addition, + the bitfield is placed within the structure so that it would fit within such + a field, not crossing a boundary for it. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* An integer expression for the size in bits of the largest integer machine + mode that should actually be used. */ + +/* Allow pairs of registers to be used, which is the intent of the default. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode) + +/* A code distinguishing the floating point format of the target machine. */ +#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT + +/* By default, the C++ compiler will use function addresses in the + vtable entries. Setting this non-zero tells the compiler to use + function descriptors instead. The value of this macro says how + many words wide the descriptor is (normally 2). It is assumed + that the address of a function descriptor may be treated as a + pointer to a function. */ +#define TARGET_VTABLE_USES_DESCRIPTORS 2 + +/* Layout of Source Language Data Types */ + +#define INT_TYPE_SIZE 32 + +#define SHORT_TYPE_SIZE 16 + +#define LONG_TYPE_SIZE (TARGET_ILP32 ? 32 : 64) + +#define MAX_LONG_TYPE_SIZE 64 + +#define LONG_LONG_TYPE_SIZE 64 + +#define CHAR_TYPE_SIZE 8 + +#define FLOAT_TYPE_SIZE 32 + +#define DOUBLE_TYPE_SIZE 64 + +#define LONG_DOUBLE_TYPE_SIZE 128 + +/* Tell real.c that this is the 80-bit Intel extended float format + packaged in a 128-bit entity. */ + +#define INTEL_EXTENDED_IEEE_FORMAT 1 + +#define DEFAULT_SIGNED_CHAR 1 + +/* A C expression for a string describing the name of the data type to use for + size values. The typedef name `size_t' is defined using the contents of the + string. */ +/* ??? Needs to be defined for P64 code. */ +/* #define SIZE_TYPE */ + +/* A C expression for a string describing the name of the data type to use for + the result of subtracting two pointers. The typedef name `ptrdiff_t' is + defined using the contents of the string. See `SIZE_TYPE' above for more + information. */ +/* ??? Needs to be defined for P64 code. */ +/* #define PTRDIFF_TYPE */ + +/* A C expression for a string describing the name of the data type to use for + wide characters. The typedef name `wchar_t' is defined using the contents + of the string. See `SIZE_TYPE' above for more information. */ +/* #define WCHAR_TYPE */ + +/* A C expression for the size in bits of the data type for wide characters. + This is used in `cpp', which cannot make use of `WCHAR_TYPE'. */ +/* #define WCHAR_TYPE_SIZE */ + + +/* Register Basics */ + +/* Number of hardware registers known to the compiler. + We have 128 general registers, 128 floating point registers, + 64 predicate registers, 8 branch registers, one frame pointer, + and several "application" registers. */ + +#define FIRST_PSEUDO_REGISTER 335 + +/* Ranges for the various kinds of registers. */ +#define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3) +#define GR_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 127) +#define FR_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 255) +#define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319) +#define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327) +#define GENERAL_REGNO_P(REGNO) \ + (GR_REGNO_P (REGNO) \ + || (REGNO) == FRAME_POINTER_REGNUM \ + || (REGNO) == RETURN_ADDRESS_POINTER_REGNUM) + +#define GR_REG(REGNO) ((REGNO) + 0) +#define FR_REG(REGNO) ((REGNO) + 128) +#define PR_REG(REGNO) ((REGNO) + 256) +#define BR_REG(REGNO) ((REGNO) + 320) +#define OUT_REG(REGNO) ((REGNO) + 120) +#define IN_REG(REGNO) ((REGNO) + 112) +#define LOC_REG(REGNO) ((REGNO) + 32) + +#define AR_CCV_REGNUM 330 +#define AR_UNAT_REGNUM 331 +#define AR_PFS_REGNUM 332 +#define AR_LC_REGNUM 333 +#define AR_EC_REGNUM 334 + +#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7)) +#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79)) +#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7)) + +#define AR_M_REGNO_P(REGNO) ((REGNO) == AR_CCV_REGNUM \ + || (REGNO) == AR_UNAT_REGNUM) +#define AR_I_REGNO_P(REGNO) ((REGNO) >= AR_PFS_REGNUM \ + && (REGNO) < FIRST_PSEUDO_REGISTER) +#define AR_REGNO_P(REGNO) ((REGNO) >= AR_CCV_REGNUM \ + && (REGNO) < FIRST_PSEUDO_REGISTER) + + +/* ??? Don't really need two sets of macros. I like this one better because + it is less typing. */ +#define R_GR(REGNO) GR_REG (REGNO) +#define R_FR(REGNO) FR_REG (REGNO) +#define R_PR(REGNO) PR_REG (REGNO) +#define R_BR(REGNO) BR_REG (REGNO) + +/* An initializer that says which registers are used for fixed purposes all + throughout the compiled code and are therefore not available for general + allocation. + + r0: constant 0 + r1: global pointer (gp) + r12: stack pointer (sp) + r13: thread pointer (tp) + f0: constant 0.0 + f1: constant 1.0 + p0: constant true + fp: eliminable frame pointer */ + +/* The last 16 stacked regs are reserved for the 8 input and 8 output + registers. */ + +#define FIXED_REGISTERS \ +{ /* General registers. */ \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Floating-point registers. */ \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Predicate registers. */ \ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Branch registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + /*FP RA CCV UNAT PFS LC EC */ \ + 1, 1, 1, 1, 1, 0, 1 \ + } + +/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered + (in general) by function calls as well as for fixed registers. This + macro therefore identifies the registers that are not available for + general allocation of values that must live across function calls. */ + +#define CALL_USED_REGISTERS \ +{ /* General registers. */ \ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Floating-point registers. */ \ + 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Predicate registers. */ \ + 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Branch registers. */ \ + 1, 0, 0, 0, 0, 0, 1, 1, \ + /*FP RA CCV UNAT PFS LC EC */ \ + 1, 1, 1, 1, 1, 0, 1 \ +} + +/* Like `CALL_USED_REGISTERS' but used to overcome a historical + problem which makes CALL_USED_REGISTERS *always* include + all the FIXED_REGISTERS. Until this problem has been + resolved this macro can be used to overcome this situation. + In particular, block_propagate() requires this list + be acurate, or we can remove registers which should be live. + This macro is used in regs_invalidated_by_call. */ + +#define CALL_REALLY_USED_REGISTERS \ +{ /* General registers. */ \ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Floating-point registers. */ \ + 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Predicate registers. */ \ + 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Branch registers. */ \ + 1, 0, 0, 0, 0, 0, 1, 1, \ + /*FP RA CCV UNAT PFS LC EC */ \ + 0, 0, 1, 0, 1, 0, 0 \ +} + + +/* Define this macro if the target machine has register windows. This C + expression returns the register number as seen by the called function + corresponding to the register number OUT as seen by the calling function. + Return OUT if register number OUT is not an outbound register. */ + +#define INCOMING_REGNO(OUT) \ + ((unsigned) ((OUT) - OUT_REG (0)) < 8 ? IN_REG ((OUT) - OUT_REG (0)) : (OUT)) + +/* Define this macro if the target machine has register windows. This C + expression returns the register number as seen by the calling function + corresponding to the register number IN as seen by the called function. + Return IN if register number IN is not an inbound register. */ + +#define OUTGOING_REGNO(IN) \ + ((unsigned) ((IN) - IN_REG (0)) < 8 ? OUT_REG ((IN) - IN_REG (0)) : (IN)) + +/* Define this macro if the target machine has register windows. This + C expression returns true if the register is call-saved but is in the + register window. */ + +#define LOCAL_REGNO(REGNO) \ + (IN_REGNO_P (REGNO) || LOC_REGNO_P (REGNO)) + +/* Add any extra modes needed to represent the condition code. + + CCImode is used to mark a single predicate register instead + of a register pair. This is currently only used in reg_raw_mode + so that flow doesn't do something stupid. */ + +#define EXTRA_CC_MODES CC(CCImode, "CCI") + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. Must be defined if + EXTRA_CC_MODES is defined. */ + +#define SELECT_CC_MODE(OP,X,Y) CCmode + +/* Order of allocation of registers */ + +/* If defined, an initializer for a vector of integers, containing the numbers + of hard registers in the order in which GNU CC should prefer to use them + (from most preferred to least). + + If this macro is not defined, registers are used lowest numbered first (all + else being equal). + + One use of this macro is on machines where the highest numbered registers + must always be saved and the save-multiple-registers instruction supports + only sequences of consecutive registers. On such machines, define + `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered + allocatable register first. */ + +/* ??? Should the GR return value registers come before or after the rest + of the caller-save GRs? */ + +#define REG_ALLOC_ORDER \ +{ \ + /* Caller-saved general registers. */ \ + R_GR (14), R_GR (15), R_GR (16), R_GR (17), \ + R_GR (18), R_GR (19), R_GR (20), R_GR (21), R_GR (22), R_GR (23), \ + R_GR (24), R_GR (25), R_GR (26), R_GR (27), R_GR (28), R_GR (29), \ + R_GR (30), R_GR (31), \ + /* Output registers. */ \ + R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125), \ + R_GR (126), R_GR (127), \ + /* Caller-saved general registers, also used for return values. */ \ + R_GR (8), R_GR (9), R_GR (10), R_GR (11), \ + /* addl caller-saved general registers. */ \ + R_GR (2), R_GR (3), \ + /* Caller-saved FP registers. */ \ + R_FR (6), R_FR (7), \ + /* Caller-saved FP registers, used for parameters and return values. */ \ + R_FR (8), R_FR (9), R_FR (10), R_FR (11), \ + R_FR (12), R_FR (13), R_FR (14), R_FR (15), \ + /* Rotating caller-saved FP registers. */ \ + R_FR (32), R_FR (33), R_FR (34), R_FR (35), \ + R_FR (36), R_FR (37), R_FR (38), R_FR (39), R_FR (40), R_FR (41), \ + R_FR (42), R_FR (43), R_FR (44), R_FR (45), R_FR (46), R_FR (47), \ + R_FR (48), R_FR (49), R_FR (50), R_FR (51), R_FR (52), R_FR (53), \ + R_FR (54), R_FR (55), R_FR (56), R_FR (57), R_FR (58), R_FR (59), \ + R_FR (60), R_FR (61), R_FR (62), R_FR (63), R_FR (64), R_FR (65), \ + R_FR (66), R_FR (67), R_FR (68), R_FR (69), R_FR (70), R_FR (71), \ + R_FR (72), R_FR (73), R_FR (74), R_FR (75), R_FR (76), R_FR (77), \ + R_FR (78), R_FR (79), R_FR (80), R_FR (81), R_FR (82), R_FR (83), \ + R_FR (84), R_FR (85), R_FR (86), R_FR (87), R_FR (88), R_FR (89), \ + R_FR (90), R_FR (91), R_FR (92), R_FR (93), R_FR (94), R_FR (95), \ + R_FR (96), R_FR (97), R_FR (98), R_FR (99), R_FR (100), R_FR (101), \ + R_FR (102), R_FR (103), R_FR (104), R_FR (105), R_FR (106), R_FR (107), \ + R_FR (108), R_FR (109), R_FR (110), R_FR (111), R_FR (112), R_FR (113), \ + R_FR (114), R_FR (115), R_FR (116), R_FR (117), R_FR (118), R_FR (119), \ + R_FR (120), R_FR (121), R_FR (122), R_FR (123), R_FR (124), R_FR (125), \ + R_FR (126), R_FR (127), \ + /* Caller-saved predicate registers. */ \ + R_PR (6), R_PR (7), R_PR (8), R_PR (9), R_PR (10), R_PR (11), \ + R_PR (12), R_PR (13), R_PR (14), R_PR (15), \ + /* Rotating caller-saved predicate registers. */ \ + R_PR (16), R_PR (17), \ + R_PR (18), R_PR (19), R_PR (20), R_PR (21), R_PR (22), R_PR (23), \ + R_PR (24), R_PR (25), R_PR (26), R_PR (27), R_PR (28), R_PR (29), \ + R_PR (30), R_PR (31), R_PR (32), R_PR (33), R_PR (34), R_PR (35), \ + R_PR (36), R_PR (37), R_PR (38), R_PR (39), R_PR (40), R_PR (41), \ + R_PR (42), R_PR (43), R_PR (44), R_PR (45), R_PR (46), R_PR (47), \ + R_PR (48), R_PR (49), R_PR (50), R_PR (51), R_PR (52), R_PR (53), \ + R_PR (54), R_PR (55), R_PR (56), R_PR (57), R_PR (58), R_PR (59), \ + R_PR (60), R_PR (61), R_PR (62), R_PR (63), \ + /* Caller-saved branch registers. */ \ + R_BR (6), R_BR (7), \ + \ + /* Stacked callee-saved general registers. */ \ + R_GR (32), R_GR (33), R_GR (34), R_GR (35), \ + R_GR (36), R_GR (37), R_GR (38), R_GR (39), R_GR (40), R_GR (41), \ + R_GR (42), R_GR (43), R_GR (44), R_GR (45), R_GR (46), R_GR (47), \ + R_GR (48), R_GR (49), R_GR (50), R_GR (51), R_GR (52), R_GR (53), \ + R_GR (54), R_GR (55), R_GR (56), R_GR (57), R_GR (58), R_GR (59), \ + R_GR (60), R_GR (61), R_GR (62), R_GR (63), R_GR (64), R_GR (65), \ + R_GR (66), R_GR (67), R_GR (68), R_GR (69), R_GR (70), R_GR (71), \ + R_GR (72), R_GR (73), R_GR (74), R_GR (75), R_GR (76), R_GR (77), \ + R_GR (78), R_GR (79), R_GR (80), R_GR (81), R_GR (82), R_GR (83), \ + R_GR (84), R_GR (85), R_GR (86), R_GR (87), R_GR (88), R_GR (89), \ + R_GR (90), R_GR (91), R_GR (92), R_GR (93), R_GR (94), R_GR (95), \ + R_GR (96), R_GR (97), R_GR (98), R_GR (99), R_GR (100), R_GR (101), \ + R_GR (102), R_GR (103), R_GR (104), R_GR (105), R_GR (106), R_GR (107), \ + R_GR (108), \ + /* Input registers. */ \ + R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117), \ + R_GR (118), R_GR (119), \ + /* Callee-saved general registers. */ \ + R_GR (4), R_GR (5), R_GR (6), R_GR (7), \ + /* Callee-saved FP registers. */ \ + R_FR (2), R_FR (3), R_FR (4), R_FR (5), R_FR (16), R_FR (17), \ + R_FR (18), R_FR (19), R_FR (20), R_FR (21), R_FR (22), R_FR (23), \ + R_FR (24), R_FR (25), R_FR (26), R_FR (27), R_FR (28), R_FR (29), \ + R_FR (30), R_FR (31), \ + /* Callee-saved predicate registers. */ \ + R_PR (1), R_PR (2), R_PR (3), R_PR (4), R_PR (5), \ + /* Callee-saved branch registers. */ \ + R_BR (1), R_BR (2), R_BR (3), R_BR (4), R_BR (5), \ + \ + /* ??? Stacked registers reserved for fp, rp, and ar.pfs. */ \ + R_GR (109), R_GR (110), R_GR (111), \ + \ + /* Special general registers. */ \ + R_GR (0), R_GR (1), R_GR (12), R_GR (13), \ + /* Special FP registers. */ \ + R_FR (0), R_FR (1), \ + /* Special predicate registers. */ \ + R_PR (0), \ + /* Special branch registers. */ \ + R_BR (0), \ + /* Other fixed registers. */ \ + FRAME_POINTER_REGNUM, RETURN_ADDRESS_POINTER_REGNUM, \ + AR_CCV_REGNUM, AR_UNAT_REGNUM, AR_PFS_REGNUM, AR_LC_REGNUM, \ + AR_EC_REGNUM \ +} + +/* How Values Fit in Registers */ + +/* A C expression for the number of consecutive hard registers, starting at + register number REGNO, required to hold a value of mode MODE. */ + +/* ??? We say that BImode PR values require two registers. This allows us to + easily store the normal and inverted values. We use CCImode to indicate + a single predicate register. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((REGNO) == PR_REG (0) && (MODE) == DImode ? 64 \ + : PR_REGNO_P (REGNO) && (MODE) == BImode ? 2 \ + : PR_REGNO_P (REGNO) && (MODE) == CCImode ? 1 \ + : FR_REGNO_P (REGNO) && (MODE) == TFmode && INTEL_EXTENDED_IEEE_FORMAT ? 1 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* A C expression that is nonzero if it is permissible to store a value of mode + MODE in hard register number REGNO (or in several registers starting with + that one). */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (FR_REGNO_P (REGNO) ? \ + GET_MODE_CLASS (MODE) != MODE_CC && \ + (MODE) != TImode && \ + (MODE) != BImode && \ + ((MODE) != TFmode || INTEL_EXTENDED_IEEE_FORMAT) \ + : PR_REGNO_P (REGNO) ? \ + (MODE) == BImode || GET_MODE_CLASS (MODE) == MODE_CC \ + : GR_REGNO_P (REGNO) ? (MODE) != CCImode && (MODE) != TFmode \ + : AR_REGNO_P (REGNO) ? (MODE) == DImode \ + : BR_REGNO_P (REGNO) ? (MODE) == DImode \ + : 0) + +/* A C expression that is nonzero if it is desirable to choose register + allocation so as to avoid move instructions between a value of mode MODE1 + and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are + ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be + zero. */ +/* Don't tie integer and FP modes, as that causes us to get integer registers + allocated for FP instructions. TFmode only supported in FP registers so + we can't tie it with any other modes. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2) \ + && (((MODE1) == TFmode) == ((MODE2) == TFmode)) \ + && (((MODE1) == BImode) == ((MODE2) == BImode))) + +/* Handling Leaf Functions */ + +/* A C initializer for a vector, indexed by hard register number, which + contains 1 for a register that is allowable in a candidate for leaf function + treatment. */ +/* ??? This might be useful. */ +/* #define LEAF_REGISTERS */ + +/* A C expression whose value is the register number to which REGNO should be + renumbered, when a function is treated as a leaf function. */ +/* ??? This might be useful. */ +/* #define LEAF_REG_REMAP(REGNO) */ + + +/* Register Classes */ + +/* An enumeral type that must be defined with all the register class names as + enumeral values. `NO_REGS' must be first. `ALL_REGS' must be the last + register class, followed by one more enumeral value, `LIM_REG_CLASSES', + which is not a register class but rather tells how many classes there + are. */ +/* ??? When compiling without optimization, it is possible for the only use of + a pseudo to be a parameter load from the stack with a REG_EQUIV note. + Regclass handles this case specially and does not assign any costs to the + pseudo. The pseudo then ends up using the last class before ALL_REGS. + Thus we must not let either PR_REGS or BR_REGS be the last class. The + testcase for this is gcc.c-torture/execute/va-arg-7.c. */ +enum reg_class +{ + NO_REGS, + PR_REGS, + BR_REGS, + AR_M_REGS, + AR_I_REGS, + ADDL_REGS, + GR_REGS, + FR_REGS, + GR_AND_BR_REGS, + GR_AND_FR_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define GENERAL_REGS GR_REGS + +/* The number of distinct register classes. */ +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +/* An initializer containing the names of the register classes as C string + constants. These names are used in writing some of the debugging dumps. */ +#define REG_CLASS_NAMES \ +{ "NO_REGS", "PR_REGS", "BR_REGS", "AR_M_REGS", "AR_I_REGS", \ + "ADDL_REGS", "GR_REGS", "FR_REGS", \ + "GR_AND_BR_REGS", "GR_AND_FR_REGS", "ALL_REGS" } + +/* An initializer containing the contents of the register classes, as integers + which are bit masks. The Nth integer specifies the contents of class N. + The way the integer MASK is interpreted is that register R is in the class + if `MASK & (1 << R)' is 1. */ +#define REG_CLASS_CONTENTS \ +{ \ + /* NO_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x0000 }, \ + /* PR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0x0000 }, \ + /* BR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00FF }, \ + /* AR_M_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x0C00 }, \ + /* AR_I_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x7000 }, \ + /* ADDL_REGS. */ \ + { 0x0000000F, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x0000 }, \ + /* GR_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x0300 }, \ + /* FR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x0000 }, \ + /* GR_AND_BR_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x03FF }, \ + /* GR_AND_FR_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x0300 }, \ + /* ALL_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFF }, \ +} + +/* A C expression whose value is a register class containing hard register + REGNO. In general there is more than one such class; choose a class which + is "minimal", meaning that no smaller class also contains the register. */ +/* The NO_REGS case is primarily for the benefit of rws_access_reg, which + may call here with private (invalid) register numbers, such as + REG_VOLATILE. */ +#define REGNO_REG_CLASS(REGNO) \ +(ADDL_REGNO_P (REGNO) ? ADDL_REGS \ + : GENERAL_REGNO_P (REGNO) ? GR_REGS \ + : FR_REGNO_P (REGNO) ? FR_REGS \ + : PR_REGNO_P (REGNO) ? PR_REGS \ + : BR_REGNO_P (REGNO) ? BR_REGS \ + : AR_M_REGNO_P (REGNO) ? AR_M_REGS \ + : AR_I_REGNO_P (REGNO) ? AR_I_REGS \ + : NO_REGS) + +/* A macro whose definition is the name of the class to which a valid base + register must belong. A base register is one used in an address which is + the register value plus a displacement. */ +#define BASE_REG_CLASS GENERAL_REGS + +/* A macro whose definition is the name of the class to which a valid index + register must belong. An index register is one used in an address where its + value is either multiplied by a scale factor or added to another register + (as well as added to a displacement). This is needed for POST_MODIFY. */ +#define INDEX_REG_CLASS GENERAL_REGS + +/* A C expression which defines the machine-dependent operand constraint + letters for register classes. If CHAR is such a letter, the value should be + the register class corresponding to it. Otherwise, the value should be + `NO_REGS'. The register letter `r', corresponding to class `GENERAL_REGS', + will not be passed to this macro; you do not need to handle it. */ + +#define REG_CLASS_FROM_LETTER(CHAR) \ +((CHAR) == 'f' ? FR_REGS \ + : (CHAR) == 'a' ? ADDL_REGS \ + : (CHAR) == 'b' ? BR_REGS \ + : (CHAR) == 'c' ? PR_REGS \ + : (CHAR) == 'd' ? AR_M_REGS \ + : (CHAR) == 'e' ? AR_I_REGS \ + : NO_REGS) + +/* A C expression which is nonzero if register number NUM is suitable for use + as a base register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard reg. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ + (GENERAL_REGNO_P (REGNO) || GENERAL_REGNO_P (reg_renumber[REGNO])) + +/* A C expression which is nonzero if register number NUM is suitable for use + as an index register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard reg. + This is needed for POST_MODIFY. */ +#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM) + +/* A C expression that places additional restrictions on the register class to + use when it is necessary to copy value X into a register in class CLASS. + The value is a register class; perhaps CLASS, or perhaps another, smaller + class. */ + +/* Don't allow volatile mem reloads into floating point registers. This + is defined to force reload to choose the r/m case instead of the f/f case + when reloading (set (reg fX) (mem/v)). + + Do not reload expressions into AR regs. */ + +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + (CLASS == FR_REGS && GET_CODE (X) == MEM && MEM_VOLATILE_P (X) ? NO_REGS \ + : CLASS == FR_REGS && GET_CODE (X) == CONST_DOUBLE ? NO_REGS \ + : GET_RTX_CLASS (GET_CODE (X)) != 'o' \ + && (CLASS == AR_M_REGS || CLASS == AR_I_REGS) ? NO_REGS \ + : CLASS) + +/* You should define this macro to indicate to the reload phase that it may + need to allocate at least one register for a reload in addition to the + register to contain the data. Specifically, if copying X to a register + CLASS in MODE requires an intermediate register, you should define this + to return the largest register class all of whose registers can be used + as intermediate registers or scratch registers. */ + +#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \ + ia64_secondary_reload_class (CLASS, MODE, X) + +/* Certain machines have the property that some registers cannot be copied to + some other registers without using memory. Define this macro on those + machines to be a C expression that is non-zero if objects of mode M in + registers of CLASS1 can only be copied to registers of class CLASS2 by + storing a register of CLASS1 into memory and loading that memory location + into a register of CLASS2. */ + +#if 0 +/* ??? May need this, but since we've disallowed TFmode in GR_REGS, + I'm not quite sure how it could be invoked. The normal problems + with unions should be solved with the addressof fiddling done by + movtf and friends. */ +#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ + ((MODE) == TFmode && (((CLASS1) == GR_REGS && (CLASS2) == FR_REGS) \ + || ((CLASS1) == FR_REGS && (CLASS2) == GR_REGS))) +#endif + +/* A C expression for the maximum number of consecutive registers of + class CLASS needed to hold a value of mode MODE. + This is closely related to the macro `HARD_REGNO_NREGS'. */ + +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((MODE) == BImode && (CLASS) == PR_REGS ? 2 \ + : ((CLASS) == FR_REGS && (MODE) == TFmode) ? 1 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* If defined, gives a class of registers that cannot be used as the + operand of a SUBREG that changes the mode of the object illegally. */ + +#define CLASS_CANNOT_CHANGE_MODE FR_REGS + +/* Defines illegal mode changes for CLASS_CANNOT_CHANGE_MODE. + In FP regs, we can't change FP values to integer values and vice + versa, but we can change e.g. DImode to SImode. */ + +#define CLASS_CANNOT_CHANGE_MODE_P(FROM,TO) \ + (GET_MODE_CLASS (FROM) != GET_MODE_CLASS (TO)) + +/* A C expression that defines the machine-dependent operand constraint + letters (`I', `J', `K', .. 'P') that specify particular ranges of + integer values. */ + +/* 14 bit signed immediate for arithmetic instructions. */ +#define CONST_OK_FOR_I(VALUE) \ + ((unsigned HOST_WIDE_INT)(VALUE) + 0x2000 < 0x4000) +/* 22 bit signed immediate for arith instructions with r0/r1/r2/r3 source. */ +#define CONST_OK_FOR_J(VALUE) \ + ((unsigned HOST_WIDE_INT)(VALUE) + 0x200000 < 0x400000) +/* 8 bit signed immediate for logical instructions. */ +#define CONST_OK_FOR_K(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x80 < 0x100) +/* 8 bit adjusted signed immediate for compare pseudo-ops. */ +#define CONST_OK_FOR_L(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x7F < 0x100) +/* 6 bit unsigned immediate for shift counts. */ +#define CONST_OK_FOR_M(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) < 0x40) +/* 9 bit signed immediate for load/store post-increments. */ +#define CONST_OK_FOR_N(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x100 < 0x200) +/* 0 for r0. Used by Linux kernel, do not change. */ +#define CONST_OK_FOR_O(VALUE) ((VALUE) == 0) +/* 0 or -1 for dep instruction. */ +#define CONST_OK_FOR_P(VALUE) ((VALUE) == 0 || (VALUE) == -1) + +#define CONST_OK_FOR_LETTER_P(VALUE, C) \ +((C) == 'I' ? CONST_OK_FOR_I (VALUE) \ + : (C) == 'J' ? CONST_OK_FOR_J (VALUE) \ + : (C) == 'K' ? CONST_OK_FOR_K (VALUE) \ + : (C) == 'L' ? CONST_OK_FOR_L (VALUE) \ + : (C) == 'M' ? CONST_OK_FOR_M (VALUE) \ + : (C) == 'N' ? CONST_OK_FOR_N (VALUE) \ + : (C) == 'O' ? CONST_OK_FOR_O (VALUE) \ + : (C) == 'P' ? CONST_OK_FOR_P (VALUE) \ + : 0) + +/* A C expression that defines the machine-dependent operand constraint letters + (`G', `H') that specify particular ranges of `const_double' values. */ + +/* 0.0 and 1.0 for fr0 and fr1. */ +#define CONST_DOUBLE_OK_FOR_G(VALUE) \ + ((VALUE) == CONST0_RTX (GET_MODE (VALUE)) \ + || (VALUE) == CONST1_RTX (GET_MODE (VALUE))) + +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ + ((C) == 'G' ? CONST_DOUBLE_OK_FOR_G (VALUE) : 0) + +/* A C expression that defines the optional machine-dependent constraint + letters (`Q', `R', `S', `T', `U') that can be used to segregate specific + types of operands, usually memory references, for the target machine. */ + +/* Non-volatile memory for FP_REG loads/stores. */ +#define CONSTRAINT_OK_FOR_Q(VALUE) \ + (memory_operand((VALUE), VOIDmode) && ! MEM_VOLATILE_P (VALUE)) +/* 1..4 for shladd arguments. */ +#define CONSTRAINT_OK_FOR_R(VALUE) \ + (GET_CODE (VALUE) == CONST_INT && INTVAL (VALUE) >= 1 && INTVAL (VALUE) <= 4) +/* Non-post-inc memory for asms and other unsavory creatures. */ +#define CONSTRAINT_OK_FOR_S(VALUE) \ + (GET_CODE (VALUE) == MEM \ + && GET_RTX_CLASS (GET_CODE (XEXP ((VALUE), 0))) != 'a' \ + && (reload_in_progress || memory_operand ((VALUE), VOIDmode))) + +#define EXTRA_CONSTRAINT(VALUE, C) \ + ((C) == 'Q' ? CONSTRAINT_OK_FOR_Q (VALUE) \ + : (C) == 'R' ? CONSTRAINT_OK_FOR_R (VALUE) \ + : (C) == 'S' ? CONSTRAINT_OK_FOR_S (VALUE) \ + : 0) + +/* Basic Stack Layout */ + +/* Define this macro if pushing a word onto the stack moves the stack pointer + to a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this macro if the addresses of local variable slots are at negative + offsets from the frame pointer. */ +/* #define FRAME_GROWS_DOWNWARD */ + +/* Offset from the frame pointer to the first local variable slot to + be allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. If not specified, the default value of zero + is used. This is the proper value for most machines. */ +/* IA64 has a 16 byte scratch area that is at the bottom of the stack. */ +#define STACK_POINTER_OFFSET 16 + +/* Offset from the argument pointer register to the first argument's address. + On some machines it may depend on the data type of the function. */ +#define FIRST_PARM_OFFSET(FUNDECL) 0 + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame, after the + prologue. */ + +/* ??? Frames other than zero would likely require interpreting the frame + unwind info, so we don't try to support them. We would also need to define + DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush). */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + ((COUNT) == 0 ? return_address_pointer_rtx : const0_rtx) + +/* A C expression whose value is RTL representing the location of the incoming + return address at the beginning of any function, before the prologue. This + RTL is either a `REG', indicating that the return value is saved in `REG', + or a `MEM' representing a location in the stack. This enables DWARF2 + unwind info for C++ EH. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, BR_REG (0)) + +/* ??? This is not defined because of three problems. + 1) dwarf2out.c assumes that DWARF_FRAME_RETURN_COLUMN fits in one byte. + The default value is FIRST_PSEUDO_REGISTER which doesn't. This can be + worked around by setting PC_REGNUM to FR_REG (0) which is an otherwise + unused register number. + 2) dwarf2out_frame_debug core dumps while processing prologue insns. We + need to refine which insns have RTX_FRAME_RELATED_P set and which don't. + 3) It isn't possible to turn off EH frame info by defining DWARF2_UNIND_INFO + to zero, despite what the documentation implies, because it is tested in + a few places with #ifdef instead of #if. */ +#undef INCOMING_RETURN_ADDR_RTX + +/* A C expression whose value is an integer giving the offset, in bytes, from + the value of the stack pointer register to the top of the stack frame at the + beginning of any function, before the prologue. The top of the frame is + defined to be the value of the stack pointer in the previous frame, just + before the call instruction. */ +#define INCOMING_FRAME_SP_OFFSET 0 + + +/* Register That Address the Stack Frame. */ + +/* The register number of the stack pointer register, which must also be a + fixed register according to `FIXED_REGISTERS'. On most machines, the + hardware determines which register this is. */ + +#define STACK_POINTER_REGNUM 12 + +/* The register number of the frame pointer register, which is used to access + automatic variables in the stack frame. On some machines, the hardware + determines which register this is. On other machines, you can choose any + register you wish for this purpose. */ + +#define FRAME_POINTER_REGNUM 328 + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM LOC_REG (79) + +/* The register number of the arg pointer register, which is used to access the + function's argument list. */ +/* r0 won't otherwise be used, so put the always eliminated argument pointer + in it. */ +#define ARG_POINTER_REGNUM R_GR(0) + +/* The register number for the return address register. For IA-64, this + is not actually a pointer as the name suggests, but that's a name that + gen_rtx_REG already takes care to keep unique. We modify + return_address_pointer_rtx in ia64_expand_prologue to reference the + final output regnum. */ +#define RETURN_ADDRESS_POINTER_REGNUM 329 + +/* Register numbers used for passing a function's static chain pointer. */ +/* ??? The ABI sez the static chain should be passed as a normal parameter. */ +#define STATIC_CHAIN_REGNUM 15 + +/* Eliminating the Frame Pointer and the Arg Pointer */ + +/* A C expression which is nonzero if a function must have and use a frame + pointer. This expression is evaluated in the reload pass. If its value is + nonzero the function will have a frame pointer. */ +#define FRAME_POINTER_REQUIRED 0 + +/* Show we can debug even without a frame pointer. */ +#define CAN_DEBUG_WITHOUT_FP + +/* If defined, this macro specifies a table of register pairs used to eliminate + unneeded registers that point into the stack frame. */ + +#define ELIMINABLE_REGS \ +{ \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + {RETURN_ADDRESS_POINTER_REGNUM, BR_REG (0)}, \ +} + +/* A C expression that returns non-zero if the compiler is allowed to try to + replace register number FROM with register number TO. The frame pointer + is automatically handled. */ + +#define CAN_ELIMINATE(FROM, TO) \ + (TO == BR_REG (0) ? current_function_is_leaf : 1) + +/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'. It + specifies the initial difference between the specified pair of + registers. This macro must be defined if `ELIMINABLE_REGS' is + defined. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = ia64_initial_elimination_offset ((FROM), (TO))) + +/* Passing Function Arguments on the Stack */ + +/* Define this macro if an argument declared in a prototype as an integral type + smaller than `int' should actually be passed as an `int'. In addition to + avoiding errors in certain cases of mismatch, it also makes for better code + on certain machines. */ +/* ??? Investigate. */ +/* #define PROMOTE_PROTOTYPES */ + +/* If defined, the maximum amount of space required for outgoing arguments will + be computed and placed into the variable + `current_function_outgoing_args_size'. */ + +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* A C expression that should indicate the number of bytes of its own arguments + that a function pops on returning, or 0 if the function pops no arguments + and the caller must therefore pop them all after the function returns. */ + +#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, STACK_SIZE) 0 + + +/* Function Arguments in Registers */ + +#define MAX_ARGUMENT_SLOTS 8 +#define MAX_INT_RETURN_SLOTS 4 +#define GR_ARG_FIRST IN_REG (0) +#define GR_RET_FIRST GR_REG (8) +#define GR_RET_LAST GR_REG (11) +#define FR_ARG_FIRST FR_REG (8) +#define FR_RET_FIRST FR_REG (8) +#define FR_RET_LAST FR_REG (15) +#define AR_ARG_FIRST OUT_REG (0) + +/* A C expression that controls whether a function argument is passed in a + register, and which register. */ + +#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg (&CUM, MODE, TYPE, NAMED, 0) + +/* Define this macro if the target machine has "register windows", so that the + register in which a function sees an arguments is not necessarily the same + as the one in which the caller passed the argument. */ + +#define FUNCTION_INCOMING_ARG(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg (&CUM, MODE, TYPE, NAMED, 1) + +/* A C expression for the number of words, at the beginning of an argument, + must be put in registers. The value must be zero for arguments that are + passed entirely in registers or that are entirely pushed on the stack. */ + +#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg_partial_nregs (&CUM, MODE, TYPE, NAMED) + +/* A C expression that indicates when an argument must be passed by reference. + If nonzero for an argument, a copy of that argument is made in memory and a + pointer to the argument is passed instead of the argument itself. The + pointer is passed in whatever way is appropriate for passing a pointer to + that type. */ + +#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) 0 + +/* A C type for declaring a variable that is used as the first argument of + `FUNCTION_ARG' and other related values. For some target machines, the type + `int' suffices and can hold the number of bytes of argument so far. */ + +typedef struct ia64_args +{ + int words; /* # words of arguments so far */ + int fp_regs; /* # FR registers used so far */ + int prototype; /* whether function prototyped */ +} CUMULATIVE_ARGS; + +/* A C statement (sans semicolon) for initializing the variable CUM for the + state at the beginning of the argument list. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT) \ +do { \ + (CUM).words = 0; \ + (CUM).fp_regs = 0; \ + (CUM).prototype = ((FNTYPE) && TYPE_ARG_TYPES (FNTYPE)) || (LIBNAME); \ +} while (0) + +/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the + arguments for the function being compiled. If this macro is undefined, + `INIT_CUMULATIVE_ARGS' is used instead. */ + +/* We set prototype to true so that we never try to return a PARALLEL from + function_arg. */ +#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \ +do { \ + (CUM).words = 0; \ + (CUM).fp_regs = 0; \ + (CUM).prototype = 1; \ +} while (0) + +/* A C statement (sans semicolon) to update the summarizer variable CUM to + advance past an argument in the argument list. The values MODE, TYPE and + NAMED describe that argument. Once this is done, the variable CUM is + suitable for analyzing the *following* argument with `FUNCTION_ARG'. */ + +#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg_advance (&CUM, MODE, TYPE, NAMED) + +/* If defined, a C expression that gives the alignment boundary, in bits, of an + argument with the specified mode and type. */ + +/* Arguments with alignment larger than 8 bytes start at the next even + boundary. See ia64_function_arg. */ + +#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \ + (((TYPE) ? (TYPE_ALIGN (TYPE) > 8 * BITS_PER_UNIT) \ + : (((((MODE) == BLKmode \ + ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \ + + UNITS_PER_WORD - 1) / UNITS_PER_WORD) > 1)) \ + ? 128 : PARM_BOUNDARY) + +/* A C expression that is nonzero if REGNO is the number of a hard register in + which function arguments are sometimes passed. This does *not* include + implicit arguments such as the static chain and the structure-value address. + On many machines, no registers can be used for this purpose since all + function arguments are pushed on the stack. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ +(((REGNO) >= GR_ARG_FIRST && (REGNO) < (GR_ARG_FIRST + MAX_ARGUMENT_SLOTS)) \ + || ((REGNO) >= FR_ARG_FIRST && (REGNO) < (FR_ARG_FIRST + MAX_ARGUMENT_SLOTS))) + +/* Implement `va_start' for varargs and stdarg. */ +#define EXPAND_BUILTIN_VA_START(stdarg, valist, nextarg) \ + ia64_va_start (stdarg, valist, nextarg) + +/* Implement `va_arg'. */ +#define EXPAND_BUILTIN_VA_ARG(valist, type) \ + ia64_va_arg (valist, type) + +/* How Scalar Function Values are Returned */ + +/* A C expression to create an RTX representing the place where a function + returns a value of data type VALTYPE. */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + ia64_function_value (VALTYPE, FUNC) + +/* A C expression to create an RTX representing the place where a library + function returns a value of mode MODE. */ + +#define LIBCALL_VALUE(MODE) \ + gen_rtx_REG (MODE, \ + (((GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) && \ + ((MODE) != TFmode || INTEL_EXTENDED_IEEE_FORMAT)) \ + ? FR_RET_FIRST : GR_RET_FIRST)) + +/* A C expression that is nonzero if REGNO is the number of a hard register in + which the values of called function may come back. */ + +#define FUNCTION_VALUE_REGNO_P(REGNO) \ + (((REGNO) >= GR_RET_FIRST && (REGNO) <= GR_RET_LAST) \ + || ((REGNO) >= FR_RET_FIRST && (REGNO) <= FR_RET_LAST)) + + +/* How Large Values are Returned */ + +/* A nonzero value says to return the function value in memory, just as large + structures are always returned. */ + +#define RETURN_IN_MEMORY(TYPE) \ + ia64_return_in_memory (TYPE) + +/* If you define this macro to be 0, then the conventions used for structure + and union return values are decided by the `RETURN_IN_MEMORY' macro. */ + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* If the structure value address is passed in a register, then + `STRUCT_VALUE_REGNUM' should be the number of that register. */ + +#define STRUCT_VALUE_REGNUM GR_REG (8) + + +/* Caller-Saves Register Allocation */ + +/* A C expression to determine whether it is worthwhile to consider placing a + pseudo-register in a call-clobbered hard register and saving and restoring + it around each function call. The expression should be 1 when this is worth + doing, and 0 otherwise. + + If you don't define this macro, a default is used which is good on most + machines: `4 * CALLS < REFS'. */ +/* ??? Investigate. */ +/* #define CALLER_SAVE_PROFITABLE(REFS, CALLS) */ + + +/* Function Entry and Exit */ + +/* Define this macro as a C expression that is nonzero if the return + instruction or the function epilogue ignores the value of the stack pointer; + in other words, if it is safe to delete an instruction to adjust the stack + pointer before a return from the function. */ + +#define EXIT_IGNORE_STACK 1 + +/* Define this macro as a C expression that is nonzero for registers + used by the epilogue or the `return' pattern. */ + +#define EPILOGUE_USES(REGNO) ia64_epilogue_uses (REGNO) + +/* Output at beginning of assembler file. */ + +#define ASM_FILE_START(FILE) \ + emit_safe_across_calls (FILE) + +/* A C compound statement that outputs the assembler code for a thunk function, + used to implement C++ virtual function calls with multiple inheritance. */ + +#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \ +do { \ + if (CONST_OK_FOR_I (DELTA)) \ + { \ + fprintf (FILE, "\tadds r32 = "); \ + fprintf (FILE, HOST_WIDE_INT_PRINT_DEC, (DELTA)); \ + fprintf (FILE, ", r32\n"); \ + } \ + else \ + { \ + if (CONST_OK_FOR_J (DELTA)) \ + { \ + fprintf (FILE, "\taddl r2 = "); \ + fprintf (FILE, HOST_WIDE_INT_PRINT_DEC, (DELTA)); \ + fprintf (FILE, ", r0\n"); \ + } \ + else \ + { \ + fprintf (FILE, "\tmovl r2 = "); \ + fprintf (FILE, HOST_WIDE_INT_PRINT_DEC, (DELTA)); \ + fprintf (FILE, "\n"); \ + } \ + fprintf (FILE, "\t;;\n"); \ + fprintf (FILE, "\tadd r32 = r2, r32\n"); \ + } \ + fprintf (FILE, "\tbr "); \ + assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* Output part N of a function descriptor for DECL. For ia64, both + words are emitted with a single relocation, so ignore N > 0. */ +#define ASM_OUTPUT_FDESC(FILE, DECL, PART) \ +do { \ + if ((PART) == 0) \ + { \ + fputs ("\tdata16.ua @iplt(", FILE); \ + assemble_name (FILE, XSTR (XEXP (DECL_RTL (DECL), 0), 0)); \ + fputs (")\n", FILE); \ + } \ +} while (0) + +/* Generating Code for Profiling. */ + +/* A C statement or compound statement to output to FILE some assembler code to + call the profiling subroutine `mcount'. */ + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(FILE, LABELNO) \ +do { \ + char buf[20]; \ + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", LABELNO); \ + fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", FILE); \ + if (TARGET_AUTO_PIC) \ + fputs ("\tmovl out3 = @gprel(", FILE); \ + else \ + fputs ("\taddl out3 = @ltoff(", FILE); \ + assemble_name (FILE, buf); \ + if (TARGET_AUTO_PIC) \ + fputs (");;\n", FILE); \ + else \ + fputs ("), r1;;\n", FILE); \ + fputs ("\tmov out1 = r1\n", FILE); \ + fputs ("\tmov out2 = b0\n", FILE); \ + fputs ("\tbr.call.sptk.many b0 = _mcount;;\n", FILE); \ +} while (0) + +/* Implementing the Varargs Macros. */ + +/* Define this macro to store the anonymous register arguments into the stack + so that all the arguments appear to have been passed consecutively on the + stack. */ + +#define SETUP_INCOMING_VARARGS(ARGS_SO_FAR, MODE, TYPE, PRETEND_ARGS_SIZE, SECOND_TIME) \ + ia64_setup_incoming_varargs (ARGS_SO_FAR, MODE, TYPE, & PRETEND_ARGS_SIZE, SECOND_TIME) + +/* Define this macro if the location where a function argument is passed + depends on whether or not it is a named argument. */ + +#define STRICT_ARGUMENT_NAMING 1 + + +/* Trampolines for Nested Functions. */ + +/* We need 32 bytes, so we can save the sp, ar.rnat, ar.bsp, and ar.pfs of + the function containing a non-local goto target. */ + +#define STACK_SAVEAREA_MODE(LEVEL) \ + ((LEVEL) == SAVE_NONLOCAL ? OImode : Pmode) + +/* Output assembler code for a block containing the constant parts of + a trampoline, leaving space for the variable parts. + + The trampoline should set the static chain pointer to value placed + into the trampoline and should branch to the specified routine. + To make the normal indirect-subroutine calling convention work, + the trampoline must look like a function descriptor; the first + word being the target address and the second being the target's + global pointer. + + We abuse the concept of a global pointer by arranging for it + to point to the data we need to load. The complete trampoline + has the following form: + + +-------------------+ \ + TRAMP: | __ia64_trampoline | | + +-------------------+ > fake function descriptor + | TRAMP+16 | | + +-------------------+ / + | target descriptor | + +-------------------+ + | static link | + +-------------------+ +*/ + +/* A C expression for the size in bytes of the trampoline, as an integer. */ + +#define TRAMPOLINE_SIZE 32 + +/* Alignment required for trampolines, in bits. */ + +#define TRAMPOLINE_ALIGNMENT 64 + +/* A C statement to initialize the variable parts of a trampoline. */ + +#define INITIALIZE_TRAMPOLINE(ADDR, FNADDR, STATIC_CHAIN) \ + ia64_initialize_trampoline((ADDR), (FNADDR), (STATIC_CHAIN)) + +/* Implicit Calls to Library Routines */ + +/* Define this macro if GNU CC should generate calls to the System V (and ANSI + C) library functions `memcpy' and `memset' rather than the BSD functions + `bcopy' and `bzero'. */ + +#define TARGET_MEM_FUNCTIONS + + +/* Addressing Modes */ + +/* Define this macro if the machine supports post-increment addressing. */ + +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_POST_MODIFY_DISP 1 +#define HAVE_POST_MODIFY_REG 1 + +/* A C expression that is 1 if the RTX X is a constant which is a valid + address. */ + +#define CONSTANT_ADDRESS_P(X) 0 + +/* The max number of registers that can appear in a valid memory address. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* A C compound statement with a conditional `goto LABEL;' executed if X (an + RTX) is a legitimate memory address on the target machine for a memory + operand of mode MODE. */ + +#define LEGITIMATE_ADDRESS_REG(X) \ + ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + || (GET_CODE (X) == SUBREG && GET_CODE (XEXP (X, 0)) == REG \ + && REG_OK_FOR_BASE_P (XEXP (X, 0)))) + +#define LEGITIMATE_ADDRESS_DISP(R, X) \ + (GET_CODE (X) == PLUS \ + && rtx_equal_p (R, XEXP (X, 0)) \ + && (LEGITIMATE_ADDRESS_REG (XEXP (X, 1)) \ + || (GET_CODE (XEXP (X, 1)) == CONST_INT \ + && INTVAL (XEXP (X, 1)) >= -256 \ + && INTVAL (XEXP (X, 1)) < 256))) + +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ +do { \ + if (LEGITIMATE_ADDRESS_REG (X)) \ + goto LABEL; \ + else if ((GET_CODE (X) == POST_INC || GET_CODE (X) == POST_DEC) \ + && LEGITIMATE_ADDRESS_REG (XEXP (X, 0)) \ + && XEXP (X, 0) != arg_pointer_rtx) \ + goto LABEL; \ + else if (GET_CODE (X) == POST_MODIFY \ + && LEGITIMATE_ADDRESS_REG (XEXP (X, 0)) \ + && XEXP (X, 0) != arg_pointer_rtx \ + && LEGITIMATE_ADDRESS_DISP (XEXP (X, 0), XEXP (X, 1))) \ + goto LABEL; \ +} while (0) + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as a base register. */ + +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) +#else +#define REG_OK_FOR_BASE_P(X) \ + (GENERAL_REGNO_P (REGNO (X)) || (REGNO (X) >= FIRST_PSEUDO_REGISTER)) +#endif + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as an index register. This is needed for POST_MODIFY. */ + +#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X) + +/* A C compound statement that attempts to replace X with a valid memory + address for an operand of mode MODE. + + This must be present, but there is nothing useful to be done here. */ + +#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) + +/* A C statement or compound statement with a conditional `goto LABEL;' + executed if memory address X (an RTX) can have different meanings depending + on the machine mode of the memory reference it is used for or if the address + is valid for some modes but not others. */ + +#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \ + if (GET_CODE (ADDR) == POST_DEC || GET_CODE (ADDR) == POST_INC) \ + goto LABEL; + +/* A C expression that is nonzero if X is a legitimate constant for an + immediate operand on the target machine. */ + +#define LEGITIMATE_CONSTANT_P(X) \ + (GET_CODE (X) != CONST_DOUBLE || GET_MODE (X) == VOIDmode \ + || GET_MODE (X) == DImode || CONST_DOUBLE_OK_FOR_G (X)) \ + + +/* Condition Code Status */ + +/* One some machines not all possible comparisons are defined, but you can + convert an invalid comparison into a valid one. */ +/* ??? Investigate. See the alpha definition. */ +/* #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) */ + + +/* Describing Relative Costs of Operations */ + +/* A part of a C `switch' statement that describes the relative costs of + constant RTL expressions. */ + +/* ??? This is incomplete. */ + +#define CONST_COSTS(X, CODE, OUTER_CODE) \ + case CONST_INT: \ + if ((X) == const0_rtx) \ + return 0; \ + switch (OUTER_CODE) \ + { \ + case SET: \ + return CONST_OK_FOR_J (INTVAL (X)) ? 0 : COSTS_N_INSNS (1); \ + case PLUS: \ + if (CONST_OK_FOR_I (INTVAL (X))) \ + return 0; \ + if (CONST_OK_FOR_J (INTVAL (X))) \ + return 1; \ + return COSTS_N_INSNS (1); \ + default: \ + if (CONST_OK_FOR_K (INTVAL (X)) || CONST_OK_FOR_L (INTVAL (X))) \ + return 0; \ + return COSTS_N_INSNS (1); \ + } \ + case CONST_DOUBLE: \ + return COSTS_N_INSNS (1); \ + case CONST: \ + case SYMBOL_REF: \ + case LABEL_REF: \ + return COSTS_N_INSNS (3); + +/* Like `CONST_COSTS' but applies to nonconstant RTL expressions. */ + +#define RTX_COSTS(X, CODE, OUTER_CODE) \ + case MULT: \ + /* For multiplies wider than HImode, we have to go to the FPU, \ + which normally involves copies. Plus there's the latency \ + of the multiply itself, and the latency of the instructions to \ + transfer integer regs to FP regs. */ \ + if (GET_MODE_SIZE (GET_MODE (X)) > 2) \ + return COSTS_N_INSNS (10); \ + return COSTS_N_INSNS (2); \ + case PLUS: \ + case MINUS: \ + case ASHIFT: \ + case ASHIFTRT: \ + case LSHIFTRT: \ + return COSTS_N_INSNS (1); \ + case DIV: \ + case UDIV: \ + case MOD: \ + case UMOD: \ + /* We make divide expensive, so that divide-by-constant will be \ + optimized to a multiply. */ \ + return COSTS_N_INSNS (60); + +/* An expression giving the cost of an addressing mode that contains ADDRESS. + If not defined, the cost is computed from the ADDRESS expression and the + `CONST_COSTS' values. */ + +#define ADDRESS_COST(ADDRESS) 0 + +/* A C expression for the cost of moving data from a register in class FROM to + one in class TO, using MODE. */ + +#define REGISTER_MOVE_COST ia64_register_move_cost + +/* A C expression for the cost of moving data of mode M between a + register and memory. */ +#define MEMORY_MOVE_COST(MODE,CLASS,IN) \ + ((CLASS) == GENERAL_REGS || (CLASS) == FR_REGS \ + || (CLASS) == GR_AND_FR_REGS ? 4 : 10) + +/* A C expression for the cost of a branch instruction. A value of 1 is the + default; other values are interpreted relative to that. Used by the + if-conversion code as max instruction count. */ +/* ??? This requires investigation. The primary effect might be how + many additional insn groups we run into, vs how good the dynamic + branch predictor is. */ + +#define BRANCH_COST 6 + +/* Define this macro as a C expression which is nonzero if accessing less than + a word of memory (i.e. a `char' or a `short') is no faster than accessing a + word of memory. */ + +#define SLOW_BYTE_ACCESS 1 + +/* Define this macro if it is as good or better to call a constant function + address than to call an address kept in a register. + + Indirect function calls are more expensive that direct function calls, so + don't cse function addresses. */ + +#define NO_FUNCTION_CSE + + +/* Dividing the output into sections. */ + +/* A C expression whose value is a string containing the assembler operation + that should precede instructions and read-only data. */ + +#define TEXT_SECTION_ASM_OP "\t.text" + +/* A C expression whose value is a string containing the assembler operation to + identify the following data as writable initialized data. */ + +#define DATA_SECTION_ASM_OP "\t.data" + +/* If defined, a C expression whose value is a string containing the assembler + operation to identify the following data as uninitialized global data. */ + +#define BSS_SECTION_ASM_OP "\t.bss" + +/* Define this macro if references to a symbol must be treated differently + depending on something about the variable or function named by the symbol + (such as what section it is in). */ + +#define ENCODE_SECTION_INFO(DECL) ia64_encode_section_info (DECL) + +/* If a variable is weakened, made one only or moved into a different + section, it may be necessary to redo the section info to move the + variable out of sdata. */ + +#define REDO_SECTION_INFO_P(DECL) \ + ((TREE_CODE (DECL) == VAR_DECL) \ + && (DECL_ONE_ONLY (DECL) || DECL_WEAK (DECL) || DECL_COMMON (DECL) \ + || DECL_SECTION_NAME (DECL) != 0)) + +#define SDATA_NAME_FLAG_CHAR '@' + +#define IA64_DEFAULT_GVALUE 8 + +/* Decode SYM_NAME and store the real name part in VAR, sans the characters + that encode section info. */ + +#define STRIP_NAME_ENCODING(VAR, SYMBOL_NAME) \ +do { \ + (VAR) = (SYMBOL_NAME); \ + if ((VAR)[0] == SDATA_NAME_FLAG_CHAR) \ + (VAR)++; \ + if ((VAR)[0] == '*') \ + (VAR)++; \ +} while (0) + +/* Position Independent Code. */ + +/* The register number of the register used to address a table of static data + addresses in memory. */ + +/* ??? Should modify ia64.md to use pic_offset_table_rtx instead of + gen_rtx_REG (DImode, 1). */ + +/* ??? Should we set flag_pic? Probably need to define + LEGITIMIZE_PIC_OPERAND_P to make that work. */ + +#define PIC_OFFSET_TABLE_REGNUM GR_REG (1) + +/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' is + clobbered by calls. */ + +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED + + +/* The Overall Framework of an Assembler File. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at the + end of the line. */ + +#define ASM_COMMENT_START "//" + +/* A C string constant for text to be output before each `asm' statement or + group of consecutive ones. */ + +/* ??? This won't work with the Intel assembler, because it does not accept + # as a comment start character. However, //APP does not work in gas, so we + can't use that either. Same problem for ASM_APP_OFF below. */ + +#define ASM_APP_ON "#APP\n" + +/* A C string constant for text to be output after each `asm' statement or + group of consecutive ones. */ + +#define ASM_APP_OFF "#NO_APP\n" + + +/* Output of Data. */ + +/* This is how to output an assembler line defining a `char' constant + to an xdata segment. */ + +#define ASM_OUTPUT_XDATA_CHAR(FILE, SECTION, VALUE) \ +do { \ + fprintf (FILE, "\t.xdata1\t\"%s\", ", SECTION); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining a `short' constant + to an xdata segment. */ + +#define ASM_OUTPUT_XDATA_SHORT(FILE, SECTION, VALUE) \ +do { \ + fprintf (FILE, "\t.xdata2\t\"%s\", ", SECTION); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining an `int' constant + to an xdata segment. We also handle symbol output here. */ + +/* ??? For ILP32, also need to handle function addresses here. */ + +#define ASM_OUTPUT_XDATA_INT(FILE, SECTION, VALUE) \ +do { \ + fprintf (FILE, "\t.xdata4\t\"%s\", ", SECTION); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining a `long' constant + to an xdata segment. We also handle symbol output here. */ + +#define ASM_OUTPUT_XDATA_DOUBLE_INT(FILE, SECTION, VALUE) \ +do { \ + int need_closing_paren = 0; \ + fprintf (FILE, "\t.xdata8\t\"%s\", ", SECTION); \ + if (!(TARGET_NO_PIC || TARGET_AUTO_PIC) \ + && GET_CODE (VALUE) == SYMBOL_REF) \ + { \ + fprintf (FILE, SYMBOL_REF_FLAG (VALUE) ? "@fptr(" : "@segrel("); \ + need_closing_paren = 1; \ + } \ + output_addr_const (FILE, VALUE); \ + if (need_closing_paren) \ + fprintf (FILE, ")"); \ + fprintf (FILE, "\n"); \ +} while (0) + + + +/* Output of Uninitialized Variables. */ + +/* This is all handled by svr4.h. */ + + +/* Output and Generation of Labels. */ + +/* A C statement (sans semicolon) to output to the stdio stream STREAM the + assembler definition of a label named NAME. */ + +/* See the ASM_OUTPUT_LABELREF definition in sysv4.h for an explanation of + why ia64_asm_output_label exists. */ + +extern int ia64_asm_output_label; +#define ASM_OUTPUT_LABEL(STREAM, NAME) \ +do { \ + ia64_asm_output_label = 1; \ + assemble_name (STREAM, NAME); \ + fputs (":\n", STREAM); \ + ia64_asm_output_label = 0; \ +} while (0) + +/* A C statement (sans semicolon) to output to the stdio stream STREAM some + commands that will make the label NAME global; that is, available for + reference from other files. */ + +#define ASM_GLOBALIZE_LABEL(STREAM,NAME) \ +do { \ + fputs ("\t.global ", STREAM); \ + assemble_name (STREAM, NAME); \ + fputs ("\n", STREAM); \ +} while (0) + +/* A C statement (sans semicolon) to output to the stdio stream STREAM any text + necessary for declaring the name of an external symbol named NAME which is + referenced in this compilation but not defined. */ + +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + ia64_asm_output_external (FILE, DECL, NAME) + +/* A C statement to store into the string STRING a label whose name is made + from the string PREFIX and the number NUM. */ + +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ +do { \ + sprintf (LABEL, "*.%s%d", PREFIX, NUM); \ +} while (0) + +/* A C expression to assign to OUTVAR (which is a variable of type `char *') a + newly allocated string made from the string NAME and the number NUMBER, with + some suitable punctuation added. */ + +/* ??? Not sure if using a ? in the name for Intel as is safe. */ + +#define ASM_FORMAT_PRIVATE_NAME(OUTVAR, NAME, NUMBER) \ +do { \ + (OUTVAR) = (char *) alloca (strlen (NAME) + 12); \ + sprintf (OUTVAR, "%s%c%ld", (NAME), (TARGET_GNU_AS ? '.' : '?'), \ + (long)(NUMBER)); \ +} while (0) + +/* A C statement to output to the stdio stream STREAM assembler code which + defines (equates) the symbol NAME to have the value VALUE. */ + +#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \ +do { \ + assemble_name (STREAM, NAME); \ + fputs (" = ", STREAM); \ + assemble_name (STREAM, VALUE); \ + fputc ('\n', STREAM); \ +} while (0) + + +/* Macros Controlling Initialization Routines. */ + +/* This is handled by svr4.h and sysv4.h. */ + + +/* Output of Assembler Instructions. */ + +/* A C initializer containing the assembler's names for the machine registers, + each one as a C string constant. */ + +#define REGISTER_NAMES \ +{ \ + /* General registers. */ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", \ + "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", \ + "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", \ + "r30", "r31", \ + /* Local registers. */ \ + "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", \ + "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", \ + "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", \ + "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", \ + "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", \ + "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", \ + "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", \ + "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", \ + "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", \ + "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79", \ + /* Input registers. */ \ + "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7", \ + /* Output registers. */ \ + "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7", \ + /* Floating-point registers. */ \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", \ + "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", \ + "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", \ + "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39", \ + "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", "f48", "f49", \ + "f50", "f51", "f52", "f53", "f54", "f55", "f56", "f57", "f58", "f59", \ + "f60", "f61", "f62", "f63", "f64", "f65", "f66", "f67", "f68", "f69", \ + "f70", "f71", "f72", "f73", "f74", "f75", "f76", "f77", "f78", "f79", \ + "f80", "f81", "f82", "f83", "f84", "f85", "f86", "f87", "f88", "f89", \ + "f90", "f91", "f92", "f93", "f94", "f95", "f96", "f97", "f98", "f99", \ + "f100","f101","f102","f103","f104","f105","f106","f107","f108","f109",\ + "f110","f111","f112","f113","f114","f115","f116","f117","f118","f119",\ + "f120","f121","f122","f123","f124","f125","f126","f127", \ + /* Predicate registers. */ \ + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", \ + "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", \ + "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", \ + "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", \ + "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", \ + "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", \ + "p60", "p61", "p62", "p63", \ + /* Branch registers. */ \ + "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", \ + /* Frame pointer. Return address. */ \ + "sfp", "retaddr", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec", \ +} + +/* If defined, a C initializer for an array of structures containing a name and + a register number. This macro defines additional names for hard registers, + thus allowing the `asm' option in declarations to refer to registers using + alternate names. */ + +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + { "gp", R_GR (1) }, \ + { "sp", R_GR (12) }, \ + { "in0", IN_REG (0) }, \ + { "in1", IN_REG (1) }, \ + { "in2", IN_REG (2) }, \ + { "in3", IN_REG (3) }, \ + { "in4", IN_REG (4) }, \ + { "in5", IN_REG (5) }, \ + { "in6", IN_REG (6) }, \ + { "in7", IN_REG (7) }, \ + { "out0", OUT_REG (0) }, \ + { "out1", OUT_REG (1) }, \ + { "out2", OUT_REG (2) }, \ + { "out3", OUT_REG (3) }, \ + { "out4", OUT_REG (4) }, \ + { "out5", OUT_REG (5) }, \ + { "out6", OUT_REG (6) }, \ + { "out7", OUT_REG (7) }, \ + { "loc0", LOC_REG (0) }, \ + { "loc1", LOC_REG (1) }, \ + { "loc2", LOC_REG (2) }, \ + { "loc3", LOC_REG (3) }, \ + { "loc4", LOC_REG (4) }, \ + { "loc5", LOC_REG (5) }, \ + { "loc6", LOC_REG (6) }, \ + { "loc7", LOC_REG (7) }, \ + { "loc8", LOC_REG (8) }, \ + { "loc9", LOC_REG (9) }, \ + { "loc10", LOC_REG (10) }, \ + { "loc11", LOC_REG (11) }, \ + { "loc12", LOC_REG (12) }, \ + { "loc13", LOC_REG (13) }, \ + { "loc14", LOC_REG (14) }, \ + { "loc15", LOC_REG (15) }, \ + { "loc16", LOC_REG (16) }, \ + { "loc17", LOC_REG (17) }, \ + { "loc18", LOC_REG (18) }, \ + { "loc19", LOC_REG (19) }, \ + { "loc20", LOC_REG (20) }, \ + { "loc21", LOC_REG (21) }, \ + { "loc22", LOC_REG (22) }, \ + { "loc23", LOC_REG (23) }, \ + { "loc24", LOC_REG (24) }, \ + { "loc25", LOC_REG (25) }, \ + { "loc26", LOC_REG (26) }, \ + { "loc27", LOC_REG (27) }, \ + { "loc28", LOC_REG (28) }, \ + { "loc29", LOC_REG (29) }, \ + { "loc30", LOC_REG (30) }, \ + { "loc31", LOC_REG (31) }, \ + { "loc32", LOC_REG (32) }, \ + { "loc33", LOC_REG (33) }, \ + { "loc34", LOC_REG (34) }, \ + { "loc35", LOC_REG (35) }, \ + { "loc36", LOC_REG (36) }, \ + { "loc37", LOC_REG (37) }, \ + { "loc38", LOC_REG (38) }, \ + { "loc39", LOC_REG (39) }, \ + { "loc40", LOC_REG (40) }, \ + { "loc41", LOC_REG (41) }, \ + { "loc42", LOC_REG (42) }, \ + { "loc43", LOC_REG (43) }, \ + { "loc44", LOC_REG (44) }, \ + { "loc45", LOC_REG (45) }, \ + { "loc46", LOC_REG (46) }, \ + { "loc47", LOC_REG (47) }, \ + { "loc48", LOC_REG (48) }, \ + { "loc49", LOC_REG (49) }, \ + { "loc50", LOC_REG (50) }, \ + { "loc51", LOC_REG (51) }, \ + { "loc52", LOC_REG (52) }, \ + { "loc53", LOC_REG (53) }, \ + { "loc54", LOC_REG (54) }, \ + { "loc55", LOC_REG (55) }, \ + { "loc56", LOC_REG (56) }, \ + { "loc57", LOC_REG (57) }, \ + { "loc58", LOC_REG (58) }, \ + { "loc59", LOC_REG (59) }, \ + { "loc60", LOC_REG (60) }, \ + { "loc61", LOC_REG (61) }, \ + { "loc62", LOC_REG (62) }, \ + { "loc63", LOC_REG (63) }, \ + { "loc64", LOC_REG (64) }, \ + { "loc65", LOC_REG (65) }, \ + { "loc66", LOC_REG (66) }, \ + { "loc67", LOC_REG (67) }, \ + { "loc68", LOC_REG (68) }, \ + { "loc69", LOC_REG (69) }, \ + { "loc70", LOC_REG (70) }, \ + { "loc71", LOC_REG (71) }, \ + { "loc72", LOC_REG (72) }, \ + { "loc73", LOC_REG (73) }, \ + { "loc74", LOC_REG (74) }, \ + { "loc75", LOC_REG (75) }, \ + { "loc76", LOC_REG (76) }, \ + { "loc77", LOC_REG (77) }, \ + { "loc78", LOC_REG (78) }, \ + { "loc79", LOC_REG (79) }, \ +} + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand X. X is an RTL expression. */ + +#define PRINT_OPERAND(STREAM, X, CODE) \ + ia64_print_operand (STREAM, X, CODE) + +/* A C expression which evaluates to true if CODE is a valid punctuation + character for use in the `PRINT_OPERAND' macro. */ + +/* ??? Keep this around for now, as we might need it later. */ + +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \ + ((CODE) == '+' || (CODE) == ',') + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand that is a memory reference whose address is X. X + is an RTL expression. */ + +#define PRINT_OPERAND_ADDRESS(STREAM, X) \ + ia64_print_operand_address (STREAM, X) + +/* If defined, C string expressions to be used for the `%R', `%L', `%U', and + `%I' options of `asm_fprintf' (see `final.c'). */ + +#define REGISTER_PREFIX "" +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" +#define IMMEDIATE_PREFIX "" + + +/* Output of dispatch tables. */ + +/* This macro should be provided on machines where the addresses in a dispatch + table are relative to the table's own address. */ + +/* ??? Depends on the pointer size. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + fprintf (STREAM, "\tdata8 @pcrel(.L%d)\n", VALUE) + +/* This is how to output an element of a case-vector that is absolute. + (Ia64 does not use such vectors, but we must define this macro anyway.) */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) abort () + +/* Jump tables only need 8 byte alignment. */ + +#define ADDR_VEC_ALIGN(ADDR_VEC) 3 + + +/* Assembler Commands for Exception Regions. */ + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((CODE) == 1 ? DW_EH_PE_textrel : DW_EH_PE_datarel) \ + | ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_udata8) + +/* Handle special EH pointer encodings. Absolute, pc-relative, and + indirect are handled automatically. */ +#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \ + do { \ + const char *reltag = NULL; \ + if (((ENCODING) & 0xF0) == DW_EH_PE_textrel) \ + reltag = "@segrel("; \ + else if (((ENCODING) & 0xF0) == DW_EH_PE_datarel) \ + reltag = "@gprel("; \ + if (reltag) \ + { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + fputs (reltag, FILE); \ + assemble_name (FILE, XSTR (ADDR, 0)); \ + fputc (')', FILE); \ + goto DONE; \ + } \ + } while (0) + + +/* Assembler Commands for Alignment. */ + +/* ??? Investigate. */ + +/* The alignment (log base 2) to put in front of LABEL, which follows + a BARRIER. */ + +/* #define LABEL_ALIGN_AFTER_BARRIER(LABEL) */ + +/* The desired alignment for the location counter at the beginning + of a loop. */ + +/* #define LOOP_ALIGN(LABEL) */ + +/* Define this macro if `ASM_OUTPUT_SKIP' should not be used in the text + section because it fails put zeros in the bytes that are skipped. */ + +#define ASM_NO_SKIP_IN_TEXT 1 + +/* A C statement to output to the stdio stream STREAM an assembler command to + advance the location counter to a multiple of 2 to the POWER bytes. */ + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + fprintf (STREAM, "\t.align %d\n", 1<<(POWER)) + + +/* Macros Affecting all Debug Formats. */ + +/* This is handled in svr4.h and sysv4.h. */ + + +/* Specific Options for DBX Output. */ + +/* This is handled by dbxelf.h which is included by svr4.h. */ + + +/* Open ended Hooks for DBX Output. */ + +/* Likewise. */ + + +/* File names in DBX format. */ + +/* Likewise. */ + + +/* Macros for SDB and Dwarf Output. */ + +/* Define this macro if GNU CC should produce dwarf version 2 format debugging + output in response to the `-g' option. */ + +#define DWARF2_DEBUGGING_INFO + +#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DWARF2_ASM) + +/* Use tags for debug info labels, so that they don't break instruction + bundles. This also avoids getting spurious DV warnings from the + assembler. This is similar to ASM_OUTPUT_INTERNAL_LABEL, except that we + add brackets around the label. */ + +#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \ + fprintf (FILE, "[.%s%d:]\n", PREFIX, NUM) + +/* Use section-relative relocations for debugging offsets. Unlike other + targets that fake this by putting the section VMA at 0, IA-64 has + proper relocations for them. */ +#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL) \ + do { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + fputs ("@secrel(", FILE); \ + assemble_name (FILE, LABEL); \ + fputc (')', FILE); \ + } while (0) + +/* Emit a PC-relative relocation. */ +#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \ + do { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + fputs ("@pcrel(", FILE); \ + assemble_name (FILE, LABEL); \ + fputc (')', FILE); \ + } while (0) + +/* Cross Compilation and Floating Point. */ + +/* Define to enable software floating point emulation. */ +#define REAL_ARITHMETIC + + +/* Register Renaming Parameters. */ + +/* A C expression that is nonzero if hard register number REGNO2 can be + considered for use as a rename register for REGNO1 */ + +#define HARD_REGNO_RENAME_OK(REGNO1,REGNO2) \ + ia64_hard_regno_rename_ok((REGNO1), (REGNO2)) + + +/* Miscellaneous Parameters. */ + +/* Define this if you have defined special-purpose predicates in the file + `MACHINE.c'. For each predicate, list all rtl codes that can be in + expressions matched by the predicate. */ + +#define PREDICATE_CODES \ +{ "call_operand", {SUBREG, REG, SYMBOL_REF}}, \ +{ "got_symbolic_operand", {SYMBOL_REF, CONST, LABEL_REF}}, \ +{ "sdata_symbolic_operand", {SYMBOL_REF, CONST}}, \ +{ "symbolic_operand", {SYMBOL_REF, CONST, LABEL_REF}}, \ +{ "function_operand", {SYMBOL_REF}}, \ +{ "setjmp_operand", {SYMBOL_REF}}, \ +{ "destination_operand", {SUBREG, REG, MEM}}, \ +{ "not_postinc_memory_operand", {MEM}}, \ +{ "move_operand", {SUBREG, REG, MEM, CONST_INT, CONST_DOUBLE, \ + CONSTANT_P_RTX, SYMBOL_REF, CONST, LABEL_REF}}, \ +{ "gr_register_operand", {SUBREG, REG}}, \ +{ "fr_register_operand", {SUBREG, REG}}, \ +{ "grfr_register_operand", {SUBREG, REG}}, \ +{ "gr_nonimmediate_operand", {SUBREG, REG, MEM}}, \ +{ "fr_nonimmediate_operand", {SUBREG, REG, MEM}}, \ +{ "grfr_nonimmediate_operand", {SUBREG, REG, MEM}}, \ +{ "gr_reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \ +{ "gr_reg_or_5bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "gr_reg_or_6bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "gr_reg_or_8bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "grfr_reg_or_8bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "gr_reg_or_8bit_adjusted_operand", {SUBREG, REG, CONST_INT, \ + CONSTANT_P_RTX}}, \ +{ "gr_reg_or_8bit_and_adjusted_operand", {SUBREG, REG, CONST_INT, \ + CONSTANT_P_RTX}}, \ +{ "gr_reg_or_14bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "gr_reg_or_22bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "shift_count_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "shift_32bit_count_operand", {SUBREG, REG, CONST_INT, \ + CONSTANT_P_RTX}}, \ +{ "shladd_operand", {CONST_INT}}, \ +{ "fetchadd_operand", {CONST_INT}}, \ +{ "fr_reg_or_fp01_operand", {SUBREG, REG, CONST_DOUBLE}}, \ +{ "normal_comparison_operator", {EQ, NE, GT, LE, GTU, LEU}}, \ +{ "adjusted_comparison_operator", {LT, GE, LTU, GEU}}, \ +{ "signed_inequality_operator", {GE, GT, LE, LT}}, \ +{ "predicate_operator", {NE, EQ}}, \ +{ "condop_operator", {PLUS, MINUS, IOR, XOR, AND}}, \ +{ "ar_lc_reg_operand", {REG}}, \ +{ "ar_ccv_reg_operand", {REG}}, \ +{ "ar_pfs_reg_operand", {REG}}, \ +{ "general_tfmode_operand", {SUBREG, REG, CONST_DOUBLE, MEM}}, \ +{ "destination_tfmode_operand", {SUBREG, REG, MEM}}, \ +{ "tfreg_or_fp01_operand", {REG, CONST_DOUBLE}}, + +/* An alias for a machine mode name. This is the machine mode that elements of + a jump-table should have. */ + +#define CASE_VECTOR_MODE Pmode + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. */ + +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define this macro if operations between registers with integral mode smaller + than a word are always performed on the entire register. */ + +#define WORD_REGISTER_OPERATIONS + +/* Define this macro to be a C expression indicating when insns that read + memory in MODE, an integral mode narrower than a word, set the bits outside + of MODE to be either the sign-extension or the zero-extension of the data + read. */ + +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* The maximum number of bytes that a single instruction can move quickly from + memory to memory. */ +#define MOVE_MAX 8 + +/* A C expression which is nonzero if on this machine it is safe to "convert" + an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller + than INPREC) by merely operating on it as if it had only OUTPREC bits. */ + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* A C expression describing the value returned by a comparison operator with + an integral mode and stored by a store-flag instruction (`sCOND') when the + condition is true. */ + +/* ??? Investigate using -1 instead of 1. */ + +#define STORE_FLAG_VALUE 1 + +/* An alias for the machine mode for pointers. */ + +/* ??? This would change if we had ILP32 support. */ + +#define Pmode DImode + +/* An alias for the machine mode used for memory references to functions being + called, in `call' RTL expressions. */ + +#define FUNCTION_MODE Pmode + +/* Define this macro to handle System V style pragmas: #pragma pack and + #pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is + defined. */ + +/* If this architecture supports prefetch, define this to be the number of + prefetch commands that can be executed in parallel. + + ??? This number is bogus and needs to be replaced before the value is + actually used in optimizations. */ + +#define SIMULTANEOUS_PREFETCHES 6 + +/* If this architecture supports prefetch, define this to be the size of + the cache line that is prefetched. */ + +#define PREFETCH_BLOCK 32 + +#define HANDLE_SYSV_PRAGMA + +/* In rare cases, correct code generation requires extra machine dependent + processing between the second jump optimization pass and delayed branch + scheduling. On those machines, define this macro as a C statement to act on + the code starting at INSN. */ + +#define MACHINE_DEPENDENT_REORG(INSN) ia64_reorg (INSN) + +/* A C expression for the maximum number of instructions to execute via + conditional execution instructions instead of a branch. A value of + BRANCH_COST+1 is the default if the machine does not use + cc0, and 1 if it does use cc0. */ +/* ??? Investigate. */ +#define MAX_CONDITIONAL_EXECUTE 12 + +extern int ia64_final_schedule; + +#define IA64_UNWIND_INFO 1 +#define IA64_UNWIND_EMIT(f,i) process_for_unwind_directive (f,i) + +#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 15 : INVALID_REGNUM) + +/* This function contains machine specific function data. */ +struct machine_function +{ + /* The new stack pointer when unwinding from EH. */ + struct rtx_def* ia64_eh_epilogue_sp; + + /* The new bsp value when unwinding from EH. */ + struct rtx_def* ia64_eh_epilogue_bsp; + + /* The GP value save register. */ + struct rtx_def* ia64_gp_save; + + /* The number of varargs registers to save. */ + int n_varargs; +}; + + +enum ia64_builtins +{ + IA64_BUILTIN_SYNCHRONIZE, + + IA64_BUILTIN_FETCH_AND_ADD_SI, + IA64_BUILTIN_FETCH_AND_SUB_SI, + IA64_BUILTIN_FETCH_AND_OR_SI, + IA64_BUILTIN_FETCH_AND_AND_SI, + IA64_BUILTIN_FETCH_AND_XOR_SI, + IA64_BUILTIN_FETCH_AND_NAND_SI, + + IA64_BUILTIN_ADD_AND_FETCH_SI, + IA64_BUILTIN_SUB_AND_FETCH_SI, + IA64_BUILTIN_OR_AND_FETCH_SI, + IA64_BUILTIN_AND_AND_FETCH_SI, + IA64_BUILTIN_XOR_AND_FETCH_SI, + IA64_BUILTIN_NAND_AND_FETCH_SI, + + IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI, + IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI, + + IA64_BUILTIN_SYNCHRONIZE_SI, + + IA64_BUILTIN_LOCK_TEST_AND_SET_SI, + + IA64_BUILTIN_LOCK_RELEASE_SI, + + IA64_BUILTIN_FETCH_AND_ADD_DI, + IA64_BUILTIN_FETCH_AND_SUB_DI, + IA64_BUILTIN_FETCH_AND_OR_DI, + IA64_BUILTIN_FETCH_AND_AND_DI, + IA64_BUILTIN_FETCH_AND_XOR_DI, + IA64_BUILTIN_FETCH_AND_NAND_DI, + + IA64_BUILTIN_ADD_AND_FETCH_DI, + IA64_BUILTIN_SUB_AND_FETCH_DI, + IA64_BUILTIN_OR_AND_FETCH_DI, + IA64_BUILTIN_AND_AND_FETCH_DI, + IA64_BUILTIN_XOR_AND_FETCH_DI, + IA64_BUILTIN_NAND_AND_FETCH_DI, + + IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI, + IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI, + + IA64_BUILTIN_SYNCHRONIZE_DI, + + IA64_BUILTIN_LOCK_TEST_AND_SET_DI, + + IA64_BUILTIN_LOCK_RELEASE_DI, + + IA64_BUILTIN_BSP, + IA64_BUILTIN_FLUSHRS +}; + +/* Codes for expand_compare_and_swap and expand_swap_and_compare. */ +enum fetchop_code { + IA64_ADD_OP, IA64_SUB_OP, IA64_OR_OP, IA64_AND_OP, IA64_XOR_OP, IA64_NAND_OP +}; + +/* End of ia64.h */ diff --git a/contrib/gcc/config/ia64/ia64.md b/contrib/gcc/config/ia64/ia64.md new file mode 100644 index 0000000..c88e8b0 --- /dev/null +++ b/contrib/gcc/config/ia64/ia64.md @@ -0,0 +1,5314 @@ +;; IA-64 Machine description template +;; Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. +;; Contributed by James E. Wilson <wilson@cygnus.com> and +;; David Mosberger <davidm@hpl.hp.com>. + +;; This file is part of GNU CC. + +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; ??? register_operand accepts (subreg:DI (mem:SI X)) which forces later +;; reload. This will be fixed once scheduling support is turned on. + +;; ??? Optimize for post-increment addressing modes. + +;; ??? fselect is not supported, because there is no integer register +;; equivalent. + +;; ??? fp abs/min/max instructions may also work for integer values. + +;; ??? Would a predicate_reg_operand predicate be useful? The HP one is buggy, +;; it assumes the operand is a register and takes REGNO of it without checking. + +;; ??? Would a branch_reg_operand predicate be useful? The HP one is buggy, +;; it assumes the operand is a register and takes REGNO of it without checking. + +;; ??? Go through list of documented named patterns and look for more to +;; implement. + +;; ??? Go through instruction manual and look for more instructions that +;; can be emitted. + +;; ??? Add function unit scheduling info for Itanium (TM) processor. + +;; ??? Need a better way to describe alternate fp status registers. + +;; Unspec usage: +;; +;; unspec: +;; 1 gr_spill +;; 2 gr_restore +;; 3 fr_spill +;; 4 fr_restore +;; 5 recip_approx +;; 7 pred_rel_mutex +;; 8 popcnt +;; 9 pic call +;; 12 mf +;; 13 cmpxchg_acq +;; 19 fetchadd_acq +;; 20 bsp_value +;; 21 flushrs +;; 22 bundle selector +;; 23 cycle display +;; 24 addp4 +;; 25 prologue_use +;; +;; unspec_volatile: +;; 0 alloc +;; 1 blockage +;; 2 insn_group_barrier +;; 3 break +;; 5 set_bsp +;; 8 pred.safe_across_calls all +;; 9 pred.safe_across_calls normal + +;; :::::::::::::::::::: +;; :: +;; :: Attributes +;; :: +;; :::::::::::::::::::: + +;; Instruction type. This primarily determines how instructions can be +;; packed in bundles, and secondarily affects scheduling to function units. + +;; A alu, can go in I or M syllable of a bundle +;; I integer +;; M memory +;; F floating-point +;; B branch +;; L long immediate, takes two syllables +;; S stop bit + +;; ??? Should not have any pattern with type unknown. Perhaps add code to +;; check this in md_reorg? Currently use unknown for patterns which emit +;; multiple instructions, patterns which emit 0 instructions, and patterns +;; which emit instruction that can go in any slot (e.g. nop). + +(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x,lfetch" + (const_string "unknown")) + +;; chk_s has an I and an M form; use type A for convenience. +(define_attr "type" "unknown,A,I,M,F,B,L,X,S" + (cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M") + (eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M") + (eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M") + (eq_attr "itanium_class" "lfetch") (const_string "M") + (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A") + (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F") + (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F") + (eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I") + (eq_attr "itanium_class" "frpr,topr,ishf,xtd,tbit") (const_string "I") + (eq_attr "itanium_class" "mmmul,mmshf,mmshfi,nop_i") (const_string "I") + (eq_attr "itanium_class" "br,scall,nop_b") (const_string "B") + (eq_attr "itanium_class" "stop_bit") (const_string "S") + (eq_attr "itanium_class" "nop_x") (const_string "X") + (eq_attr "itanium_class" "long_i") (const_string "L")] + (const_string "unknown"))) + +(define_attr "itanium_requires_unit0" "no,yes" + (cond [(eq_attr "itanium_class" "syst_m0,sem,frfr,rse_m") (const_string "yes") + (eq_attr "itanium_class" "toar_m,frar_m") (const_string "yes") + (eq_attr "itanium_class" "frbr,tobr,mmmul") (const_string "yes") + (eq_attr "itanium_class" "tbit,ishf,topr,frpr") (const_string "yes") + (eq_attr "itanium_class" "toar_i,frar_i") (const_string "yes") + (eq_attr "itanium_class" "fmisc,fcmp") (const_string "yes")] + (const_string "no"))) + +;; Predication. True iff this instruction can be predicated. + +(define_attr "predicable" "no,yes" (const_string "yes")) + + +;; :::::::::::::::::::: +;; :: +;; :: Function Units +;; :: +;; :::::::::::::::::::: + +;; We define 6 "dummy" functional units. All the real work to decide which +;; insn uses which unit is done by our MD_SCHED_REORDER hooks. We only +;; have to ensure here that there are enough copies of the dummy unit so +;; that the scheduler doesn't get confused by MD_SCHED_REORDER. +;; Other than the 6 dummies for normal insns, we also add a single dummy unit +;; for stop bits. + +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp") 2 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld") 9 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac") 5 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc") 5 0) + +;; There is only one insn `mov = ar.bsp' for frar_i: +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0) +;; There is only ony insn `mov = ar.unat' for frar_m: +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr") 2 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr") 2 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr") 2 0) + +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld") 2 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul") 2 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf") 2 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi") 2 0) + +;; Now we have only one insn (flushrs) of such class. We assume that flushrs +;; is the 1st syllable of the bundle after stop bit. +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem") 11 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0) +;; Now we use only one insn `mf'. Therfore latency time is set up to 0. +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit") 1 0) + +;; There is only one insn `mov ar.pfs =' for toar_i therefore we use +;; latency time equal to 0: +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0) +;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr") 9 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr") 1 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy") 7 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd") 1 0) + +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x") 0 0) + +(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0) +(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0) + +;; :::::::::::::::::::: +;; :: +;; :: Moves +;; :: +;; :::::::::::::::::::: + +;; Set of a single predicate register. This is only used to implement +;; pr-to-pr move and complement. + +(define_insn "*movcci" + [(set (match_operand:CCI 0 "register_operand" "=c,c,c") + (match_operand:CCI 1 "nonmemory_operand" "O,n,c"))] + "" + "@ + cmp.ne %0, p0 = r0, r0 + cmp.eq %0, p0 = r0, r0 + (%1) cmp.eq.unc %0, p0 = r0, r0" + [(set_attr "itanium_class" "icmp") + (set_attr "predicable" "no")]) + +(define_insn "movbi" + [(set (match_operand:BI 0 "nonimmediate_operand" "=c,c,?c,?*r, c,*r,*r,*m,*r") + (match_operand:BI 1 "move_operand" " O,n, c, c,*r, n,*m,*r,*r"))] + "" + "@ + cmp.ne %0, %I0 = r0, r0 + cmp.eq %0, %I0 = r0, r0 + # + # + tbit.nz %0, %I0 = %1, 0 + adds %0 = %1, r0 + ld1%O1 %0 = %1%P1 + st1%Q0 %0 = %1%P0 + mov %0 = %1" + [(set_attr "itanium_class" "icmp,icmp,unknown,unknown,tbit,ialu,ld,st,ialu")]) + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (match_operand:BI 1 "register_operand" ""))] + "reload_completed + && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))" + [(cond_exec (ne (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (cond_exec (eq (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 0)))] + "") + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (match_operand:BI 1 "register_operand" ""))] + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 5)) + (set (match_dup 0) (unspec:BI [(match_dup 0)] 7))] + "operands[2] = gen_rtx_REG (CCImode, REGNO (operands[0])); + operands[3] = gen_rtx_REG (CCImode, REGNO (operands[0]) + 1); + operands[4] = gen_rtx_REG (CCImode, REGNO (operands[1])); + operands[5] = gen_rtx_REG (CCImode, REGNO (operands[1]) + 1);") + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (QImode, operands[1]); +}") + +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "destination_operand" "=r,r,r, m, r,*f,*f") + (match_operand:QI 1 "move_operand" "rO,J,m,rO,*f,rO,*f"))] + "ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %r1 + addl %0 = %1, r0 + ld1%O1 %0 = %1%P1 + st1%Q0 %0 = %r1%P0 + getf.sig %0 = %1 + setf.sig %0 = %r1 + mov %0 = %1" + [(set_attr "itanium_class" "ialu,ialu,ld,st,frfr,tofr,fmisc")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (HImode, operands[1]); +}") + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "destination_operand" "=r,r,r, m, r,*f,*f") + (match_operand:HI 1 "move_operand" "rO,J,m,rO,*f,rO,*f"))] + "ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %r1 + addl %0 = %1, r0 + ld2%O1 %0 = %1%P1 + st2%Q0 %0 = %r1%P0 + getf.sig %0 = %1 + setf.sig %0 = %r1 + mov %0 = %1" + [(set_attr "itanium_class" "ialu,ialu,ld,st,frfr,tofr,fmisc")]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (SImode, operands[1]); +}") + +(define_insn "*movsi_internal" + [(set (match_operand:SI 0 "destination_operand" "=r,r,r,r, m, r,*f,*f, r,*d") + (match_operand:SI 1 "move_operand" "rO,J,i,m,rO,*f,rO,*f,*d,rK"))] + "ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %r1 + addl %0 = %1, r0 + movl %0 = %1 + ld4%O1 %0 = %1%P1 + st4%Q0 %0 = %r1%P0 + getf.sig %0 = %1 + setf.sig %0 = %r1 + mov %0 = %1 + mov %0 = %1 + mov %0 = %r1" +;; frar_m, toar_m ??? why not frar_i and toar_i + [(set_attr "itanium_class" "ialu,ialu,long_i,ld,st,frfr,tofr,fmisc,frar_m,toar_m")]) + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (DImode, operands[1]); + if (! TARGET_NO_PIC && symbolic_operand (operands[1], DImode)) + { + /* Before optimization starts, delay committing to any particular + type of PIC address load. If this function gets deferred, we + may acquire information that changes the value of the + sdata_symbolic_operand predicate. */ + /* But don't delay for function pointers. Loading a function address + actually loads the address of the descriptor not the function. + If we represent these as SYMBOL_REFs, then they get cse'd with + calls, and we end up with calls to the descriptor address instead of + calls to the function address. Functions are not candidates for + sdata anyways. */ + if (rtx_equal_function_value_matters + && ! (GET_CODE (operands[1]) == SYMBOL_REF + && SYMBOL_REF_FLAG (operands[1]))) + emit_insn (gen_movdi_symbolic (operands[0], operands[1], gen_reg_rtx (DImode))); + else + ia64_expand_load_address (operands[0], operands[1], NULL_RTX); + DONE; + } +}") + +;; This is used during early compilation to delay the decision on +;; how to refer to a variable as long as possible. This is especially +;; important between initial rtl generation and optimization for +;; deferred functions, since we may acquire additional information +;; on the variables used in the meantime. + +;; ??? This causes us to lose REG_LABEL notes, because the insn splitter +;; does not attempt to preserve any REG_NOTES on the input instruction. + +(define_insn_and_split "movdi_symbolic" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "symbolic_operand" "s")) + (clobber (match_operand:DI 2 "register_operand" "+r")) + (use (reg:DI 1))] + "" + "* abort ();" + "" + [(const_int 0)] + "ia64_expand_load_address (operands[0], operands[1], operands[2]); DONE;") + +(define_insn "*movdi_internal" + [(set (match_operand:DI 0 "destination_operand" + "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b, r,*e, r,*d, r,*c") + (match_operand:DI 1 "move_operand" + "rO,J,i,m,rO,*f,rO,*f, Q,*f,*b,rO,*e,rK,*d,rK,*c,rO"))] + "ia64_move_ok (operands[0], operands[1])" + "* +{ + static const char * const alt[] = { + \"%,mov %0 = %r1\", + \"%,addl %0 = %1, r0\", + \"%,movl %0 = %1\", + \"%,ld8%O1 %0 = %1%P1\", + \"%,st8%Q0 %0 = %r1%P0\", + \"%,getf.sig %0 = %1\", + \"%,setf.sig %0 = %r1\", + \"%,mov %0 = %1\", + \"%,ldf8 %0 = %1%P1\", + \"%,stf8 %0 = %1%P0\", + \"%,mov %0 = %1\", + \"%,mov %0 = %r1\", + \"%,mov %0 = %1\", + \"%,mov %0 = %1\", + \"%,mov %0 = %1\", + \"%,mov %0 = %1\", + \"mov %0 = pr\", + \"mov pr = %1, -1\" + }; + + if (which_alternative == 2 && ! TARGET_NO_PIC + && symbolic_operand (operands[1], VOIDmode)) + abort (); + + return alt[which_alternative]; +}" + [(set_attr "itanium_class" "ialu,ialu,long_i,ld,st,frfr,tofr,fmisc,fld,stf,frbr,tobr,frar_i,toar_i,frar_m,toar_m,frpr,topr")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "symbolic_operand" ""))] + "reload_completed && ! TARGET_NO_PIC" + [(const_int 0)] + " +{ + ia64_expand_load_address (operands[0], operands[1], NULL_RTX); + DONE; +}") + +(define_expand "load_fptr" + [(set (match_dup 2) + (plus:DI (reg:DI 1) (match_operand:DI 1 "function_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") (match_dup 3))] + "" + " +{ + operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode); + operands[3] = gen_rtx_MEM (DImode, operands[2]); + RTX_UNCHANGING_P (operands[3]) = 1; +}") + +(define_insn "*load_fptr_internal1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (reg:DI 1) (match_operand:DI 1 "function_operand" "s")))] + "" + "addl %0 = @ltoff(@fptr(%1)), gp" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "load_gprel" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (reg:DI 1) (match_operand:DI 1 "sdata_symbolic_operand" "s")))] + "" + "addl %0 = @gprel(%1), gp" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "gprel64_offset" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "symbolic_operand" "") (reg:DI 1)))] + "" + "movl %0 = @gprel(%1)" + [(set_attr "itanium_class" "long_i")]) + +(define_expand "load_gprel64" + [(set (match_dup 2) + (minus:DI (match_operand:DI 1 "symbolic_operand" "") (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_dup 3) (match_dup 2)))] + "" + " +{ + operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode); + operands[3] = pic_offset_table_rtx; +}") + +(define_expand "load_symptr" + [(set (match_operand:DI 2 "register_operand" "") + (plus:DI (match_dup 4) (match_operand:DI 1 "got_symbolic_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") (match_dup 3))] + "" + " +{ + operands[3] = gen_rtx_MEM (DImode, operands[2]); + operands[4] = pic_offset_table_rtx; + RTX_UNCHANGING_P (operands[3]) = 1; +}") + +(define_insn "*load_symptr_internal1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (reg:DI 1) (match_operand:DI 1 "got_symbolic_operand" "s")))] + "" + "addl %0 = @ltoff(%1), gp" + [(set_attr "itanium_class" "ialu")]) + +;; With no offsettable memory references, we've got to have a scratch +;; around to play with the second word. +(define_expand "movti" + [(parallel [(set (match_operand:TI 0 "general_operand" "") + (match_operand:TI 1 "general_operand" "")) + (clobber (match_scratch:DI 2 ""))])] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (TImode, operands[1]); +}") + +(define_insn_and_split "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,r,m") + (match_operand:TI 1 "general_operand" "ri,m,r")) + (clobber (match_scratch:DI 2 "=X,&r,&r"))] + "ia64_move_ok (operands[0], operands[1])" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx adj1, adj2, in[2], out[2], insn; + int first; + + adj1 = ia64_split_timode (in, operands[1], operands[2]); + adj2 = ia64_split_timode (out, operands[0], operands[2]); + + first = 0; + if (reg_overlap_mentioned_p (out[0], in[1])) + { + if (reg_overlap_mentioned_p (out[1], in[0])) + abort (); + first = 1; + } + + if (adj1 && adj2) + abort (); + if (adj1) + emit_insn (adj1); + if (adj2) + emit_insn (adj2); + insn = emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first])); + if (GET_CODE (out[first]) == MEM + && GET_CODE (XEXP (out[first], 0)) == POST_MODIFY) + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, + XEXP (XEXP (out[first], 0), 0), + REG_NOTES (insn)); + insn = emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first])); + if (GET_CODE (out[!first]) == MEM + && GET_CODE (XEXP (out[!first], 0)) == POST_MODIFY) + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, + XEXP (XEXP (out[!first], 0), 0), + REG_NOTES (insn)); + DONE; +}" + [(set_attr "itanium_class" "unknown") + (set_attr "predicable" "no")]) + +;; ??? SSA creates these. Can't allow memories since we don't have +;; the scratch register. Fortunately combine will know how to add +;; the clobber and scratch. +(define_insn_and_split "*movti_internal_reg" + [(set (match_operand:TI 0 "register_operand" "=r") + (match_operand:TI 1 "nonmemory_operand" "ri"))] + "" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx in[2], out[2]; + int first; + + ia64_split_timode (in, operands[1], NULL_RTX); + ia64_split_timode (out, operands[0], NULL_RTX); + + first = 0; + if (reg_overlap_mentioned_p (out[0], in[1])) + { + if (reg_overlap_mentioned_p (out[1], in[0])) + abort (); + first = 1; + } + + emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first])); + emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first])); + DONE; +}" + [(set_attr "itanium_class" "unknown") + (set_attr "predicable" "no")]) + +(define_expand "reload_inti" + [(parallel [(set (match_operand:TI 0 "register_operand" "=r") + (match_operand:TI 1 "" "m")) + (clobber (match_operand:TI 2 "register_operand" "=&r"))])] + "" + " +{ + unsigned int s_regno = REGNO (operands[2]); + if (s_regno == REGNO (operands[0])) + s_regno += 1; + operands[2] = gen_rtx_REG (DImode, s_regno); +}") + +(define_expand "reload_outti" + [(parallel [(set (match_operand:TI 0 "" "=m") + (match_operand:TI 1 "register_operand" "r")) + (clobber (match_operand:TI 2 "register_operand" "=&r"))])] + "" + " +{ + unsigned int s_regno = REGNO (operands[2]); + if (s_regno == REGNO (operands[1])) + s_regno += 1; + operands[2] = gen_rtx_REG (DImode, s_regno); +}") + +;; Floating Point Moves +;; +;; Note - Patterns for SF mode moves are compulsory, but +;; patterns for DF are optional, as GCC can synthesise them. + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (SFmode, operands[1]); +}") + +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "destination_operand" "=f,f, Q,*r, f,*r,*r, m") + (match_operand:SF 1 "general_operand" "fG,Q,fG,fG,*r,*r, m,*r"))] + "ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %F1 + ldfs %0 = %1%P1 + stfs %0 = %F1%P0 + getf.s %0 = %F1 + setf.s %0 = %1 + mov %0 = %1 + ld4%O1 %0 = %1%P1 + st4%Q0 %0 = %1%P0" + [(set_attr "itanium_class" "fmisc,fld,stf,frfr,tofr,ialu,ld,st")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (DFmode, operands[1]); +}") + +(define_insn "*movdf_internal" + [(set (match_operand:DF 0 "destination_operand" "=f,f, Q,*r, f,*r,*r, m") + (match_operand:DF 1 "general_operand" "fG,Q,fG,fG,*r,*r, m,*r"))] + "ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %F1 + ldfd %0 = %1%P1 + stfd %0 = %F1%P0 + getf.d %0 = %F1 + setf.d %0 = %1 + mov %0 = %1 + ld8%O1 %0 = %1%P1 + st8%Q0 %0 = %1%P0" + [(set_attr "itanium_class" "fmisc,fld,stf,frfr,tofr,ialu,ld,st")]) + +;; With no offsettable memory references, we've got to have a scratch +;; around to play with the second word if the variable winds up in GRs. +(define_expand "movtf" + [(set (match_operand:TF 0 "general_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + " +{ + /* We must support TFmode loads into general registers for stdarg/vararg + and unprototyped calls. We split them into DImode loads for convenience. + We don't need TFmode stores from general regs, because a stdarg/vararg + routine does a block store to memory of unnamed arguments. */ + if (GET_CODE (operands[0]) == REG + && GR_REGNO_P (REGNO (operands[0]))) + { + /* We're hoping to transform everything that deals with TFmode + quantities and GR registers early in the compiler. */ + if (no_new_pseudos) + abort (); + + /* Struct to register can just use TImode instead. */ + if ((GET_CODE (operands[1]) == SUBREG + && GET_MODE (SUBREG_REG (operands[1])) == TImode) + || (GET_CODE (operands[1]) == REG + && GR_REGNO_P (REGNO (operands[1])))) + { + emit_move_insn (gen_rtx_REG (TImode, REGNO (operands[0])), + SUBREG_REG (operands[1])); + DONE; + } + + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + emit_move_insn (gen_rtx_REG (DImode, REGNO (operands[0])), + operand_subword (operands[1], 0, 0, TFmode)); + emit_move_insn (gen_rtx_REG (DImode, REGNO (operands[0]) + 1), + operand_subword (operands[1], 1, 0, TFmode)); + DONE; + } + + /* If the quantity is in a register not known to be GR, spill it. */ + if (register_operand (operands[1], TFmode)) + operands[1] = spill_tfmode_operand (operands[1], 1); + + if (GET_CODE (operands[1]) == MEM) + { + rtx out[2]; + + out[WORDS_BIG_ENDIAN] = gen_rtx_REG (DImode, REGNO (operands[0])); + out[!WORDS_BIG_ENDIAN] = gen_rtx_REG (DImode, REGNO (operands[0])+1); + + emit_move_insn (out[0], adjust_address (operands[1], DImode, 0)); + emit_move_insn (out[1], adjust_address (operands[1], DImode, 8)); + DONE; + } + + abort (); + } + + if (! reload_in_progress && ! reload_completed) + { + operands[0] = spill_tfmode_operand (operands[0], 0); + operands[1] = spill_tfmode_operand (operands[1], 0); + + if (! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (TFmode, operands[1]); + } +}") + +;; ??? There's no easy way to mind volatile acquire/release semantics. + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "destination_tfmode_operand" "=f,f, m") + (match_operand:TF 1 "general_tfmode_operand" "fG,m,fG"))] + "INTEL_EXTENDED_IEEE_FORMAT && ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %F1 + ldfe %0 = %1%P1 + stfe %0 = %F1%P0" + [(set_attr "itanium_class" "fmisc,fld,stf")]) + +;; :::::::::::::::::::: +;; :: +;; :: Conversions +;; :: +;; :::::::::::::::::::: + +;; Signed conversions from a smaller integer to a larger integer + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "gr_register_operand" "r")))] + "" + "sxt1 %0 = %1" + [(set_attr "itanium_class" "xtd")]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "gr_register_operand" "r")))] + "" + "sxt2 %0 = %1" + [(set_attr "itanium_class" "xtd")]) + +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f") + (sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))] + "" + "@ + sxt4 %0 = %1 + fsxt.r %0 = %1, %1" + [(set_attr "itanium_class" "xtd,fmisc")]) + +;; Unsigned conversions from a smaller integer to a larger integer + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "gr_nonimmediate_operand" "r,m")))] + "" + "@ + zxt1 %0 = %1 + ld1%O1 %0 = %1%P1" + [(set_attr "itanium_class" "xtd,ld")]) + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r") + (zero_extend:DI (match_operand:HI 1 "gr_nonimmediate_operand" "r,m")))] + "" + "@ + zxt2 %0 = %1 + ld2%O1 %0 = %1%P1" + [(set_attr "itanium_class" "xtd,ld")]) + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f") + (zero_extend:DI + (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))] + "" + "@ + zxt4 %0 = %1 + ld4%O1 %0 = %1%P1 + fmix.r %0 = f0, %1" + [(set_attr "itanium_class" "xtd,ld,fmisc")]) + +;; Convert between floating point types of different sizes. + +;; At first glance, it would appear that emitting fnorm for an extending +;; conversion is unnecessary. However, the stf and getf instructions work +;; correctly only if the input is properly rounded for its type. In +;; particular, we get the wrong result for getf.d/stfd if the input is a +;; denorm single. Since we don't know what the next instruction will be, we +;; have to emit an fnorm. + +;; ??? Optimization opportunity here. Get rid of the insn altogether +;; when we can. Should probably use a scheme like has been proposed +;; for ia32 in dealing with operands that match unary operators. This +;; would let combine merge the thing into adjacent insns. See also how the +;; mips port handles SIGN_EXTEND as operands to integer arithmetic insns via +;; se_register_operand. + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_extend:DF (match_operand:SF 1 "fr_register_operand" "f")))] + "" + "fnorm.d %0 = %1" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "extendsftf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (float_extend:TF (match_operand:SF 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnorm %0 = %1" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "extenddftf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (float_extend:TF (match_operand:DF 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnorm %0 = %1" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "fr_register_operand" "f")))] + "" + "fnorm.s %0 = %1" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "trunctfsf2" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF (match_operand:TF 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnorm.s %0 = %1" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "trunctfdf2" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF (match_operand:TF 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnorm.d %0 = %1" + [(set_attr "itanium_class" "fmac")]) + +;; Convert between signed integer types and floating point. + +(define_insn "floatditf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (float:TF (match_operand:DI 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.xf %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +;; ??? Suboptimal. This should be split somehow. +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "register_operand" "f")))] + "!INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.xf %0 = %1\;;;\;fnorm.d %0 = %0" + [(set_attr "itanium_class" "fcvtfx")]) + +;; ??? Suboptimal. This should be split somehow. +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "register_operand" "f")))] + "!INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.xf %0 = %1\;;;\;fnorm.s %0 = %0" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (fix:DI (match_operand:SF 1 "fr_register_operand" "f")))] + "" + "fcvt.fx.trunc %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (fix:DI (match_operand:DF 1 "fr_register_operand" "f")))] + "" + "fcvt.fx.trunc %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fix_trunctfdi2" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (fix:DI (match_operand:TF 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.fx.trunc %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fix_trunctfdi2_alts" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (fix:DI (match_operand:TF 1 "fr_register_operand" "f"))) + (use (match_operand:SI 2 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.fx.trunc.s%2 %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +;; Convert between unsigned integer types and floating point. + +(define_insn "floatunsdisf2" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (unsigned_float:SF (match_operand:DI 1 "fr_register_operand" "f")))] + "" + "fcvt.xuf.s %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "floatunsdidf2" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (unsigned_float:DF (match_operand:DI 1 "fr_register_operand" "f")))] + "" + "fcvt.xuf.d %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "floatunsditf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (unsigned_float:TF (match_operand:DI 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.xuf %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (unsigned_fix:DI (match_operand:SF 1 "fr_register_operand" "f")))] + "" + "fcvt.fxu.trunc %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (unsigned_fix:DI (match_operand:DF 1 "fr_register_operand" "f")))] + "" + "fcvt.fxu.trunc %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fixuns_trunctfdi2" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (unsigned_fix:DI (match_operand:TF 1 "fr_register_operand" "f")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.fxu.trunc %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +(define_insn "fixuns_trunctfdi2_alts" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (unsigned_fix:DI (match_operand:TF 1 "fr_register_operand" "f"))) + (use (match_operand:SI 2 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcvt.fxu.trunc.s%2 %0 = %1" + [(set_attr "itanium_class" "fcvtfx")]) + +;; :::::::::::::::::::: +;; :: +;; :: Bit field extraction +;; :: +;; :::::::::::::::::::: + +(define_insn "extv" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (sign_extract:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n") + (match_operand:DI 3 "const_int_operand" "n")))] + "" + "extr %0 = %1, %3, %2" + [(set_attr "itanium_class" "ishf")]) + +(define_insn "extzv" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n") + (match_operand:DI 3 "const_int_operand" "n")))] + "" + "extr.u %0 = %1, %3, %2" + [(set_attr "itanium_class" "ishf")]) + +;; Insert a bit field. +;; Can have 3 operands, source1 (inserter), source2 (insertee), dest. +;; Source1 can be 0 or -1. +;; Source2 can be 0. + +;; ??? Actual dep instruction is more powerful than what these insv +;; patterns support. Unfortunately, combine is unable to create patterns +;; where source2 != dest. + +(define_expand "insv" + [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "") + (match_operand:DI 1 "const_int_operand" "") + (match_operand:DI 2 "const_int_operand" "")) + (match_operand:DI 3 "nonmemory_operand" ""))] + "" + " +{ + int width = INTVAL (operands[1]); + int shift = INTVAL (operands[2]); + + /* If operand[3] is a constant, and isn't 0 or -1, then load it into a + pseudo. */ + if (! register_operand (operands[3], DImode) + && operands[3] != const0_rtx && operands[3] != constm1_rtx) + operands[3] = force_reg (DImode, operands[3]); + + /* If this is a single dep instruction, we have nothing to do. */ + if (! ((register_operand (operands[3], DImode) && width <= 16) + || operands[3] == const0_rtx || operands[3] == constm1_rtx)) + { + /* Check for cases that can be implemented with a mix instruction. */ + if (width == 32 && shift == 0) + { + /* Directly generating the mix4left instruction confuses + optimize_bit_field in function.c. Since this is performing + a useful optimization, we defer generation of the complicated + mix4left RTL to the first splitting phase. */ + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_shift_mix4left (operands[0], operands[3], tmp)); + DONE; + } + else if (width == 32 && shift == 32) + { + emit_insn (gen_mix4right (operands[0], operands[3])); + DONE; + } + + /* We could handle remaining cases by emitting multiple dep + instructions. + + If we need more than two dep instructions then we lose. A 6 + insn sequence mov mask1,mov mask2,shl;;and,and;;or is better than + mov;;dep,shr;;dep,shr;;dep. The former can be executed in 3 cycles, + the latter is 6 cycles on an Itanium (TM) processor, because there is + only one function unit that can execute dep and shr immed. + + If we only need two dep instruction, then we still lose. + mov;;dep,shr;;dep is still 4 cycles. Even if we optimize away + the unnecessary mov, this is still undesirable because it will be + hard to optimize, and it creates unnecessary pressure on the I0 + function unit. */ + + FAIL; + +#if 0 + /* This code may be useful for other IA-64 processors, so we leave it in + for now. */ + while (width > 16) + { + rtx tmp; + + emit_insn (gen_insv (operands[0], GEN_INT (16), GEN_INT (shift), + operands[3])); + shift += 16; + width -= 16; + tmp = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp, operands[3], GEN_INT (16))); + operands[3] = tmp; + } + operands[1] = GEN_INT (width); + operands[2] = GEN_INT (shift); +#endif + } +}") + +(define_insn "*insv_internal" + [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n")) + (match_operand:DI 3 "nonmemory_operand" "rP"))] + "(gr_register_operand (operands[3], DImode) && INTVAL (operands[1]) <= 16) + || operands[3] == const0_rtx || operands[3] == constm1_rtx" + "dep %0 = %3, %0, %2, %1" + [(set_attr "itanium_class" "ishf")]) + +;; Combine doesn't like to create bitfield insertions into zero. +(define_insn "*depz_internal" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")) + (match_operand:DI 3 "const_int_operand" "n")))] + "CONST_OK_FOR_M (INTVAL (operands[2])) + && ia64_depz_field_mask (operands[3], operands[2]) > 0" + "* +{ + operands[3] = GEN_INT (ia64_depz_field_mask (operands[3], operands[2])); + return \"%,dep.z %0 = %1, %2, %3\"; +}" + [(set_attr "itanium_class" "ishf")]) + +(define_insn "shift_mix4left" + [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r") + (const_int 32) (const_int 0)) + (match_operand:DI 1 "gr_register_operand" "r")) + (clobber (match_operand:DI 2 "gr_register_operand" "=r"))] + "" + "#" + [(set_attr "itanium_class" "unknown")]) + +(define_split + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "") + (const_int 32) (const_int 0)) + (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 2 "register_operand" ""))] + "reload_completed" + [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32))) + (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0)) + (lshiftrt:DI (match_dup 3) (const_int 32)))] + "operands[3] = operands[2];") + +(define_split + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "") + (const_int 32) (const_int 0)) + (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 2 "register_operand" ""))] + "! reload_completed" + [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32))) + (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0)) + (lshiftrt:DI (match_dup 3) (const_int 32)))] + "operands[3] = operands[2];") + +(define_insn "*mix4left" + [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r") + (const_int 32) (const_int 0)) + (lshiftrt:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 32)))] + "" + "mix4.l %0 = %0, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "mix4right" + [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r") + (const_int 32) (const_int 32)) + (match_operand:DI 1 "gr_reg_or_0_operand" "rO"))] + "" + "mix4.r %0 = %r1, %0" + [(set_attr "itanium_class" "mmshf")]) + +;; This is used by the rotrsi3 pattern. + +(define_insn "*mix4right_3op" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (ior:DI (zero_extend:DI (match_operand:SI 1 "gr_register_operand" "r")) + (ashift:DI (zero_extend:DI + (match_operand:SI 2 "gr_register_operand" "r")) + (const_int 32))))] + "" + "mix4.r %0 = %2, %1" + [(set_attr "itanium_class" "mmshf")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 1 bit Integer arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn_and_split "andbi3" + [(set (match_operand:BI 0 "register_operand" "=c,c,r") + (and:BI (match_operand:BI 1 "register_operand" "%0,0,r") + (match_operand:BI 2 "register_operand" "c,r,r")))] + "" + "@ + # + tbit.nz.and.orcm %0, %I0 = %2, 0 + and %0 = %2, %1" + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[2]) == REG && PR_REGNO_P (REGNO (operands[2]))" + [(cond_exec (eq (match_dup 2) (const_int 0)) + (set (match_dup 0) (and:BI (ne:BI (const_int 0) (const_int 0)) + (match_dup 0))))] + "" + [(set_attr "itanium_class" "unknown,tbit,ilog")]) + +(define_insn_and_split "*andcmbi3" + [(set (match_operand:BI 0 "register_operand" "=c,c,r") + (and:BI (not:BI (match_operand:BI 1 "register_operand" "c,r,r")) + (match_operand:BI 2 "register_operand" "0,0,r")))] + "" + "@ + # + tbit.z.and.orcm %0, %I0 = %1, 0 + andcm %0 = %2, %1" + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))" + [(cond_exec (ne (match_dup 1) (const_int 0)) + (set (match_dup 0) (and:BI (ne:BI (const_int 0) (const_int 0)) + (match_dup 0))))] + "" + [(set_attr "itanium_class" "unknown,tbit,ilog")]) + +(define_insn_and_split "iorbi3" + [(set (match_operand:BI 0 "register_operand" "=c,c,r") + (ior:BI (match_operand:BI 1 "register_operand" "%0,0,r") + (match_operand:BI 2 "register_operand" "c,r,r")))] + "" + "@ + # + tbit.nz.or.andcm %0, %I0 = %2, 0 + or %0 = %2, %1" + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[2]) == REG && PR_REGNO_P (REGNO (operands[2]))" + [(cond_exec (ne (match_dup 2) (const_int 0)) + (set (match_dup 0) (ior:BI (eq:BI (const_int 0) (const_int 0)) + (match_dup 0))))] + "" + [(set_attr "itanium_class" "unknown,tbit,ilog")]) + +(define_insn_and_split "*iorcmbi3" + [(set (match_operand:BI 0 "register_operand" "=c,c") + (ior:BI (not:BI (match_operand:BI 1 "register_operand" "c,r")) + (match_operand:BI 2 "register_operand" "0,0")))] + "" + "@ + # + tbit.z.or.andcm %0, %I0 = %1, 0" + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))" + [(cond_exec (eq (match_dup 1) (const_int 0)) + (set (match_dup 0) (ior:BI (eq:BI (const_int 0) (const_int 0)) + (match_dup 0))))] + "" + [(set_attr "itanium_class" "unknown,tbit")]) + +(define_insn "one_cmplbi2" + [(set (match_operand:BI 0 "register_operand" "=c,r,c,&c") + (not:BI (match_operand:BI 1 "register_operand" "r,r,0,c"))) + (clobber (match_scratch:BI 2 "=X,X,c,X"))] + "" + "@ + tbit.z %0, %I0 = %1, 0 + xor %0 = 1, %1 + # + #" + [(set_attr "itanium_class" "tbit,ilog,unknown,unknown")]) + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (not:BI (match_operand:BI 1 "register_operand" ""))) + (clobber (match_scratch:BI 2 ""))] + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 0) (const_int 1)) + (cond_exec (ne (match_dup 2) (const_int 0)) + (set (match_dup 0) (const_int 0))) + (set (match_dup 0) (unspec:BI [(match_dup 0)] 7))] + "operands[3] = gen_rtx_REG (CCImode, REGNO (operands[1])); + operands[4] = gen_rtx_REG (CCImode, REGNO (operands[2]));") + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (not:BI (match_operand:BI 1 "register_operand" ""))) + (clobber (match_scratch:BI 2 ""))] + "reload_completed + && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1])) + && ! rtx_equal_p (operands[0], operands[1])" + [(cond_exec (ne (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 0))) + (cond_exec (eq (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (set (match_dup 0) (unspec:BI [(match_dup 0)] 7))] + "") + +(define_insn "*cmpsi_and_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:SI 2 "gr_reg_or_0_operand" "rO") + (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C4.and.orcm %0, %I0 = %3, %r2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsi_and_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:SI 2 "gr_register_operand" "r") + (const_int 0)]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C3.and.orcm %0, %I0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsi_andnot_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (not:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:SI 2 "gr_reg_or_0_operand" "rO") + (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C4.or.andcm %I0, %0 = %3, %r2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsi_andnot_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (not:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:SI 2 "gr_register_operand" "r") + (const_int 0)])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C3.or.andcm %I0, %0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_and_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C4.and.orcm %0, %I0 = %3, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_and_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (const_int 0)]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C3.and.orcm %0, %I0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_andnot_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (not:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C4.or.andcm %I0, %0 = %3, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_andnot_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (not:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (const_int 0)])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C3.or.andcm %I0, %0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*tbit_and_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (ne:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1)) + (const_int 0)) + (match_operand:BI 2 "register_operand" "0")))] + "" + "tbit.nz.and.orcm %0, %I0 = %1, 0" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*tbit_and_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (eq:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1)) + (const_int 0)) + (match_operand:BI 2 "register_operand" "0")))] + "" + "tbit.z.and.orcm %0, %I0 = %1, 0" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*tbit_and_2" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (ne:BI (zero_extract:DI + (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1) + (match_operand:DI 2 "const_int_operand" "n")) + (const_int 0)) + (match_operand:BI 3 "register_operand" "0")))] + "" + "tbit.nz.and.orcm %0, %I0 = %1, %2" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*tbit_and_3" + [(set (match_operand:BI 0 "register_operand" "=c") + (and:BI (eq:BI (zero_extract:DI + (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1) + (match_operand:DI 2 "const_int_operand" "n")) + (const_int 0)) + (match_operand:BI 3 "register_operand" "0")))] + "" + "tbit.z.and.orcm %0, %I0 = %1, %2" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*cmpsi_or_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:SI 2 "gr_reg_or_0_operand" "rO") + (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C4.or.andcm %0, %I0 = %3, %r2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsi_or_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:SI 2 "gr_register_operand" "r") + (const_int 0)]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C3.or.andcm %0, %I0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsi_orcm_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (not:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:SI 2 "gr_reg_or_0_operand" "rO") + (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C4.and.orcm %I0, %0 = %3, %r2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsi_orcm_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (not:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:SI 2 "gr_register_operand" "r") + (const_int 0)])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp4.%C3.and.orcm %I0, %0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_or_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C4.or.andcm %0, %I0 = %3, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_or_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (const_int 0)]) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C3.or.andcm %0, %I0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_orcm_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (not:BI (match_operator:BI 4 "predicate_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C4.and.orcm %I0, %0 = %3, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_orcm_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (not:BI (match_operator:BI 3 "signed_inequality_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (const_int 0)])) + (match_operand:BI 1 "register_operand" "0")))] + "" + "cmp.%C3.and.orcm %I0, %0 = r0, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*tbit_or_0" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (ne:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1)) + (const_int 0)) + (match_operand:BI 2 "register_operand" "0")))] + "" + "tbit.nz.or.andcm %0, %I0 = %1, 0" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*tbit_or_1" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (eq:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1)) + (const_int 0)) + (match_operand:BI 2 "register_operand" "0")))] + "" + "tbit.z.or.andcm %0, %I0 = %1, 0" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*tbit_or_2" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (ne:BI (zero_extract:DI + (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1) + (match_operand:DI 2 "const_int_operand" "n")) + (const_int 0)) + (match_operand:BI 3 "register_operand" "0")))] + "" + "tbit.nz.or.andcm %0, %I0 = %1, %2" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*tbit_or_3" + [(set (match_operand:BI 0 "register_operand" "=c") + (ior:BI (eq:BI (zero_extract:DI + (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1) + (match_operand:DI 2 "const_int_operand" "n")) + (const_int 0)) + (match_operand:BI 3 "register_operand" "0")))] + "" + "tbit.z.or.andcm %0, %I0 = %1, %2" + [(set_attr "itanium_class" "tbit")]) + +;; Transform test of and/or of setcc into parallel comparisons. + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (ne:BI (and:DI (ne:DI (match_operand:BI 2 "register_operand" "") + (const_int 0)) + (match_operand:DI 3 "register_operand" "")) + (const_int 0)))] + "" + [(set (match_dup 0) + (and:BI (ne:BI (and:DI (match_dup 3) (const_int 1)) (const_int 0)) + (match_dup 2)))] + "") + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (eq:BI (and:DI (ne:DI (match_operand:BI 2 "register_operand" "") + (const_int 0)) + (match_operand:DI 3 "register_operand" "")) + (const_int 0)))] + "" + [(set (match_dup 0) + (and:BI (ne:BI (and:DI (match_dup 3) (const_int 1)) (const_int 0)) + (match_dup 2))) + (parallel [(set (match_dup 0) (not:BI (match_dup 0))) + (clobber (scratch))])] + "") + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (ne:BI (ior:DI (ne:DI (match_operand:BI 2 "register_operand" "") + (const_int 0)) + (match_operand:DI 3 "register_operand" "")) + (const_int 0)))] + "" + [(set (match_dup 0) + (ior:BI (ne:BI (match_dup 3) (const_int 0)) + (match_dup 2)))] + "") + +(define_split + [(set (match_operand:BI 0 "register_operand" "") + (eq:BI (ior:DI (ne:DI (match_operand:BI 2 "register_operand" "") + (const_int 0)) + (match_operand:DI 3 "register_operand" "")) + (const_int 0)))] + "" + [(set (match_dup 0) + (ior:BI (ne:BI (match_dup 3) (const_int 0)) + (match_dup 2))) + (parallel [(set (match_dup 0) (not:BI (match_dup 0))) + (clobber (scratch))])] + "") + +;; ??? Incredibly hackish. Either need four proper patterns with all +;; the alternatives, or rely on sched1 to split the insn and hope that +;; nothing bad happens to the comparisons in the meantime. +;; +;; Alternately, adjust combine to allow 2->2 and 3->3 splits, assuming +;; that we're doing height reduction. +; +;(define_insn_and_split "" +; [(set (match_operand:BI 0 "register_operand" "=c") +; (and:BI (and:BI (match_operator:BI 1 "comparison_operator" +; [(match_operand 2 "" "") +; (match_operand 3 "" "")]) +; (match_operator:BI 4 "comparison_operator" +; [(match_operand 5 "" "") +; (match_operand 6 "" "")])) +; (match_dup 0)))] +; "flag_schedule_insns" +; "#" +; "" +; [(set (match_dup 0) (and:BI (match_dup 1) (match_dup 0))) +; (set (match_dup 0) (and:BI (match_dup 4) (match_dup 0)))] +; "") +; +;(define_insn_and_split "" +; [(set (match_operand:BI 0 "register_operand" "=c") +; (ior:BI (ior:BI (match_operator:BI 1 "comparison_operator" +; [(match_operand 2 "" "") +; (match_operand 3 "" "")]) +; (match_operator:BI 4 "comparison_operator" +; [(match_operand 5 "" "") +; (match_operand 6 "" "")])) +; (match_dup 0)))] +; "flag_schedule_insns" +; "#" +; "" +; [(set (match_dup 0) (ior:BI (match_dup 1) (match_dup 0))) +; (set (match_dup 0) (ior:BI (match_dup 4) (match_dup 0)))] +; "") +; +;(define_split +; [(set (match_operand:BI 0 "register_operand" "") +; (and:BI (and:BI (match_operator:BI 1 "comparison_operator" +; [(match_operand 2 "" "") +; (match_operand 3 "" "")]) +; (match_operand:BI 7 "register_operand" "")) +; (and:BI (match_operator:BI 4 "comparison_operator" +; [(match_operand 5 "" "") +; (match_operand 6 "" "")]) +; (match_operand:BI 8 "register_operand" ""))))] +; "" +; [(set (match_dup 0) (and:BI (match_dup 7) (match_dup 8))) +; (set (match_dup 0) (and:BI (and:BI (match_dup 1) (match_dup 4)) +; (match_dup 0)))] +; "") +; +;(define_split +; [(set (match_operand:BI 0 "register_operand" "") +; (ior:BI (ior:BI (match_operator:BI 1 "comparison_operator" +; [(match_operand 2 "" "") +; (match_operand 3 "" "")]) +; (match_operand:BI 7 "register_operand" "")) +; (ior:BI (match_operator:BI 4 "comparison_operator" +; [(match_operand 5 "" "") +; (match_operand 6 "" "")]) +; (match_operand:BI 8 "register_operand" ""))))] +; "" +; [(set (match_dup 0) (ior:BI (match_dup 7) (match_dup 8))) +; (set (match_dup 0) (ior:BI (ior:BI (match_dup 1) (match_dup 4)) +; (match_dup 0)))] +; "") + +;; Try harder to avoid predicate copies by duplicating compares. +;; Note that we'll have already split the predicate copy, which +;; is kind of a pain, but oh well. + +(define_peephole2 + [(set (match_operand:BI 0 "register_operand" "") + (match_operand:BI 1 "comparison_operator" "")) + (set (match_operand:CCI 2 "register_operand" "") + (match_operand:CCI 3 "register_operand" "")) + (set (match_operand:CCI 4 "register_operand" "") + (match_operand:CCI 5 "register_operand" "")) + (set (match_operand:BI 6 "register_operand" "") + (unspec:BI [(match_dup 6)] 7))] + "REGNO (operands[3]) == REGNO (operands[0]) + && REGNO (operands[4]) == REGNO (operands[0]) + 1 + && REGNO (operands[4]) == REGNO (operands[2]) + 1 + && REGNO (operands[6]) == REGNO (operands[2])" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 6) (match_dup 7))] + "operands[7] = copy_rtx (operands[1]);") + +;; :::::::::::::::::::: +;; :: +;; :: 16 bit Integer arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "mulhi3" + [(set (match_operand:HI 0 "gr_register_operand" "=r") + (mult:HI (match_operand:HI 1 "gr_register_operand" "r") + (match_operand:HI 2 "gr_register_operand" "r")))] + "" + "pmpy2.r %0 = %1, %2" + [(set_attr "itanium_class" "mmmul")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit Integer arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "addsi3" + [(set (match_operand:SI 0 "gr_register_operand" "=r,r,r") + (plus:SI (match_operand:SI 1 "gr_register_operand" "%r,r,a") + (match_operand:SI 2 "gr_reg_or_22bit_operand" "r,I,J")))] + "" + "@ + add %0 = %1, %2 + adds %0 = %2, %1 + addl %0 = %2, %1" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "*addsi3_plus1" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "gr_register_operand" "r") + (match_operand:SI 2 "gr_register_operand" "r")) + (const_int 1)))] + "" + "add %0 = %1, %2, 1" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "*addsi3_plus1_alt" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "gr_register_operand" "r") + (const_int 2)) + (const_int 1)))] + "" + "add %0 = %1, %1, 1" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "*addsi3_shladd" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "gr_register_operand" "r") + (match_operand:SI 2 "shladd_operand" "n")) + (match_operand:SI 3 "gr_register_operand" "r")))] + "" + "shladd %0 = %1, %S2, %3" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (minus:SI (match_operand:SI 1 "gr_reg_or_8bit_operand" "rK") + (match_operand:SI 2 "gr_register_operand" "r")))] + "" + "sub %0 = %1, %2" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "*subsi3_minus1" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (plus:SI (not:SI (match_operand:SI 1 "gr_register_operand" "r")) + (match_operand:SI 2 "gr_register_operand" "r")))] + "" + "sub %0 = %2, %1, 1" + [(set_attr "itanium_class" "ialu")]) + +;; ??? Could add maddsi3 patterns patterned after the madddi3 patterns. + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "fr_register_operand" "=f") + (mult:SI (match_operand:SI 1 "grfr_register_operand" "f") + (match_operand:SI 2 "grfr_register_operand" "f")))] + "" + "xmpy.l %0 = %1, %2" + [(set_attr "itanium_class" "xmpy")]) + +(define_insn "maddsi4" + [(set (match_operand:SI 0 "fr_register_operand" "=f") + (plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f") + (match_operand:SI 2 "grfr_register_operand" "f")) + (match_operand:SI 3 "grfr_register_operand" "f")))] + "" + "xma.l %0 = %1, %2, %3" + [(set_attr "itanium_class" "xmpy")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (neg:SI (match_operand:SI 1 "gr_register_operand" "r")))] + "" + "sub %0 = r0, %1" + [(set_attr "itanium_class" "ialu")]) + +(define_expand "abssi2" + [(set (match_dup 2) + (ge:BI (match_operand:SI 1 "gr_register_operand" "") (const_int 0))) + (set (match_operand:SI 0 "gr_register_operand" "") + (if_then_else:SI (eq (match_dup 2) (const_int 0)) + (neg:SI (match_dup 1)) + (match_dup 1)))] + "" + " +{ + operands[2] = gen_reg_rtx (BImode); +}") + +(define_expand "sminsi3" + [(set (match_dup 3) + (ge:BI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_register_operand" ""))) + (set (match_operand:SI 0 "gr_register_operand" "") + (if_then_else:SI (ne (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "smaxsi3" + [(set (match_dup 3) + (ge:BI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_register_operand" ""))) + (set (match_operand:SI 0 "gr_register_operand" "") + (if_then_else:SI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "uminsi3" + [(set (match_dup 3) + (geu:BI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_register_operand" ""))) + (set (match_operand:SI 0 "gr_register_operand" "") + (if_then_else:SI (ne (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "umaxsi3" + [(set (match_dup 3) + (geu:BI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_register_operand" ""))) + (set (match_operand:SI 0 "gr_register_operand" "") + (if_then_else:SI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "divsi3" + [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf, op0_di, twon34; + + op0_tf = gen_reg_rtx (TFmode); + op0_di = gen_reg_rtx (DImode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 0); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 0); + + /* 2^-34 */ +#if 0 + twon34 = (CONST_DOUBLE_FROM_REAL_VALUE + (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode)); + twon34 = force_reg (TFmode, twon34); +#else + twon34 = gen_reg_rtx (TFmode); + convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0); +#endif + + emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34)); + + emit_insn (gen_fix_trunctfdi2_alts (op0_di, op0_tf, const1_rtx)); + emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); + DONE; +}") + +(define_expand "modsi3" + [(set (match_operand:SI 0 "register_operand" "") + (mod:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op2_neg, op1_di, div; + + div = gen_reg_rtx (SImode); + emit_insn (gen_divsi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); + + /* This is a trick to get us to reuse the value that we're sure to + have already copied to the FP regs. */ + op1_di = gen_reg_rtx (DImode); + convert_move (op1_di, operands[1], 0); + + emit_insn (gen_maddsi4 (operands[0], div, op2_neg, + gen_lowpart (SImode, op1_di))); + DONE; +}") + +(define_expand "udivsi3" + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf, op0_di, twon34; + + op0_tf = gen_reg_rtx (TFmode); + op0_di = gen_reg_rtx (DImode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 1); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 1); + + /* 2^-34 */ +#if 0 + twon34 = (CONST_DOUBLE_FROM_REAL_VALUE + (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode)); + twon34 = force_reg (TFmode, twon34); +#else + twon34 = gen_reg_rtx (TFmode); + convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0); +#endif + + emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34)); + + emit_insn (gen_fixuns_trunctfdi2_alts (op0_di, op0_tf, const1_rtx)); + emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); + DONE; +}") + +(define_expand "umodsi3" + [(set (match_operand:SI 0 "register_operand" "") + (umod:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op2_neg, op1_di, div; + + div = gen_reg_rtx (SImode); + emit_insn (gen_udivsi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); + + /* This is a trick to get us to reuse the value that we're sure to + have already copied to the FP regs. */ + op1_di = gen_reg_rtx (DImode); + convert_move (op1_di, operands[1], 1); + + emit_insn (gen_maddsi4 (operands[0], div, op2_neg, + gen_lowpart (SImode, op1_di))); + DONE; +}") + +(define_insn_and_split "divsi3_internal" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f")))) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:TF 5 "=&f")) + (clobber (match_scratch:BI 6 "=c")) + (use (match_operand:TF 3 "fr_register_operand" "f"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 6) (unspec:BI [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (mult:TF (match_dup 5) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) + (plus:TF (mult:TF (match_dup 5) (match_dup 5)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 5) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + ] + "operands[7] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit Integer arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "adddi3" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "gr_register_operand" "%r,r,a") + (match_operand:DI 2 "gr_reg_or_22bit_operand" "r,I,J")))] + "" + "@ + add %0 = %1, %2 + adds %0 = %2, %1 + addl %0 = %2, %1" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "*adddi3_plus1" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (plus:DI (plus:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "gr_register_operand" "r")) + (const_int 1)))] + "" + "add %0 = %1, %2, 1" + [(set_attr "itanium_class" "ialu")]) + +;; This has some of the same problems as shladd. We let the shladd +;; eliminator hack handle it, which results in the 1 being forced into +;; a register, but not more ugliness here. +(define_insn "*adddi3_plus1_alt" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 2)) + (const_int 1)))] + "" + "add %0 = %1, %1, 1" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "subdi3" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (minus:DI (match_operand:DI 1 "gr_reg_or_8bit_operand" "rK") + (match_operand:DI 2 "gr_register_operand" "r")))] + "" + "sub %0 = %1, %2" + [(set_attr "itanium_class" "ialu")]) + +(define_insn "*subdi3_minus1" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (plus:DI (not:DI (match_operand:DI 1 "gr_register_operand" "r")) + (match_operand:DI 2 "gr_register_operand" "r")))] + "" + "sub %0 = %2, %1, 1" + [(set_attr "itanium_class" "ialu")]) + +;; ??? Use grfr instead of fr because of virtual register elimination +;; and silly test cases multiplying by the frame pointer. +(define_insn "muldi3" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (mult:DI (match_operand:DI 1 "grfr_register_operand" "f") + (match_operand:DI 2 "grfr_register_operand" "f")))] + "" + "xmpy.l %0 = %1, %2" + [(set_attr "itanium_class" "xmpy")]) + +;; ??? If operand 3 is an eliminable reg, then register elimination causes the +;; same problem that we have with shladd below. Unfortunately, this case is +;; much harder to fix because the multiply puts the result in an FP register, +;; but the add needs inputs from a general register. We add a spurious clobber +;; here so that it will be present just in case register elimination gives us +;; the funny result. + +;; ??? Maybe validate_changes should try adding match_scratch clobbers? + +;; ??? Maybe we should change how adds are canonicalized. + +(define_insn "madddi4" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f") + (match_operand:DI 2 "grfr_register_operand" "f")) + (match_operand:DI 3 "grfr_register_operand" "f"))) + (clobber (match_scratch:DI 4 "=X"))] + "" + "xma.l %0 = %1, %2, %3" + [(set_attr "itanium_class" "xmpy")]) + +;; This can be created by register elimination if operand3 of shladd is an +;; eliminable register or has reg_equiv_constant set. + +;; We have to use nonmemory_operand for operand 4, to ensure that the +;; validate_changes call inside eliminate_regs will always succeed. If it +;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded +;; incorrectly. + +(define_insn "*madddi4_elim" + [(set (match_operand:DI 0 "register_operand" "=&r") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f") + (match_operand:DI 2 "register_operand" "f")) + (match_operand:DI 3 "register_operand" "f")) + (match_operand:DI 4 "nonmemory_operand" "rI"))) + (clobber (match_scratch:DI 5 "=f"))] + "reload_in_progress" + "#" + [(set_attr "itanium_class" "unknown")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")) + (match_operand:DI 3 "register_operand" "")) + (match_operand:DI 4 "gr_reg_or_14bit_operand" ""))) + (clobber (match_scratch:DI 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (clobber (match_dup 0))]) + (set (match_dup 0) (match_dup 5)) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] + "") + +;; ??? There are highpart multiply and add instructions, but we have no way +;; to generate them. + +(define_insn "smuldi3_highpart" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "fr_register_operand" "f")) + (sign_extend:TI + (match_operand:DI 2 "fr_register_operand" "f"))) + (const_int 64))))] + "" + "xmpy.h %0 = %1, %2" + [(set_attr "itanium_class" "xmpy")]) + +(define_insn "umuldi3_highpart" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "fr_register_operand" "f")) + (zero_extend:TI + (match_operand:DI 2 "fr_register_operand" "f"))) + (const_int 64))))] + "" + "xmpy.hu %0 = %1, %2" + [(set_attr "itanium_class" "xmpy")]) + +(define_insn "negdi2" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (neg:DI (match_operand:DI 1 "gr_register_operand" "r")))] + "" + "sub %0 = r0, %1" + [(set_attr "itanium_class" "ialu")]) + +(define_expand "absdi2" + [(set (match_dup 2) + (ge:BI (match_operand:DI 1 "gr_register_operand" "") (const_int 0))) + (set (match_operand:DI 0 "gr_register_operand" "") + (if_then_else:DI (eq (match_dup 2) (const_int 0)) + (neg:DI (match_dup 1)) + (match_dup 1)))] + "" + " +{ + operands[2] = gen_reg_rtx (BImode); +}") + +(define_expand "smindi3" + [(set (match_dup 3) + (ge:BI (match_operand:DI 1 "gr_register_operand" "") + (match_operand:DI 2 "gr_register_operand" ""))) + (set (match_operand:DI 0 "gr_register_operand" "") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "smaxdi3" + [(set (match_dup 3) + (ge:BI (match_operand:DI 1 "gr_register_operand" "") + (match_operand:DI 2 "gr_register_operand" ""))) + (set (match_operand:DI 0 "gr_register_operand" "") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "umindi3" + [(set (match_dup 3) + (geu:BI (match_operand:DI 1 "gr_register_operand" "") + (match_operand:DI 2 "gr_register_operand" ""))) + (set (match_operand:DI 0 "gr_register_operand" "") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "umaxdi3" + [(set (match_dup 3) + (geu:BI (match_operand:DI 1 "gr_register_operand" "") + (match_operand:DI 2 "gr_register_operand" ""))) + (set (match_operand:DI 0 "gr_register_operand" "") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (BImode); +}") + +(define_expand "ffsdi2" + [(set (match_dup 6) + (eq:BI (match_operand:DI 1 "gr_register_operand" "") (const_int 0))) + (set (match_dup 2) (plus:DI (match_dup 1) (const_int -1))) + (set (match_dup 5) (const_int 0)) + (set (match_dup 3) (xor:DI (match_dup 1) (match_dup 2))) + (set (match_dup 4) (unspec:DI [(match_dup 3)] 8)) + (set (match_operand:DI 0 "gr_register_operand" "") + (if_then_else:DI (ne (match_dup 6) (const_int 0)) + (match_dup 5) (match_dup 4)))] + "" + " +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); + operands[6] = gen_reg_rtx (BImode); +}") + +(define_insn "*popcnt" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "gr_register_operand" "r")] 8))] + "" + "popcnt %0 = %1" + [(set_attr "itanium_class" "mmmul")]) + +(define_expand "divdi3" + [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf; + + op0_tf = gen_reg_rtx (TFmode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 0); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 0); + + if (TARGET_INLINE_DIV_LAT) + emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf)); + else + emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf)); + + emit_insn (gen_fix_trunctfdi2_alts (operands[0], op0_tf, const1_rtx)); + DONE; +}") + +(define_expand "moddi3" + [(set (match_operand:DI 0 "register_operand" "") + (mod:SI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op2_neg, div; + + div = gen_reg_rtx (DImode); + emit_insn (gen_divdi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); + + emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); + DONE; +}") + +(define_expand "udivdi3" + [(set (match_operand:DI 0 "register_operand" "") + (udiv:DI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf; + + op0_tf = gen_reg_rtx (TFmode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 1); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 1); + + if (TARGET_INLINE_DIV_LAT) + emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf)); + else + emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf)); + + emit_insn (gen_fixuns_trunctfdi2_alts (operands[0], op0_tf, const1_rtx)); + DONE; +}") + +(define_expand "umoddi3" + [(set (match_operand:DI 0 "register_operand" "") + (umod:DI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx op2_neg, div; + + div = gen_reg_rtx (DImode); + emit_insn (gen_udivdi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); + + emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); + DONE; +}") + +(define_insn_and_split "divdi3_internal_lat" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f")))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:TF 5 "=&f")) + (clobber (match_scratch:BI 6 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_LAT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 6) (unspec:BI [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (mult:TF (match_dup 3) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 5) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 5) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 3))) + (use (const_int 1))])) + ] + "operands[7] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +(define_insn_and_split "divdi3_internal_thr" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f")))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=f")) + (clobber (match_scratch:BI 5 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_THR" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 5) (unspec:BI [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 0) (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 3))) + (use (const_int 1))])) + ] + "operands[6] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit floating point arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "addsf3" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (plus:SF (match_operand:SF 1 "fr_register_operand" "%f") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fadd.s %0 = %1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (minus:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fsub.s %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (mult:SF (match_operand:SF 1 "fr_register_operand" "%f") + (match_operand:SF 2 "fr_register_operand" "f")))] + "" + "fmpy.s %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (abs:SF (match_operand:SF 1 "fr_register_operand" "f")))] + "" + "fabs %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (neg:SF (match_operand:SF 1 "fr_register_operand" "f")))] + "" + "fneg %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*nabssf2" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (neg:SF (abs:SF (match_operand:SF 1 "fr_register_operand" "f"))))] + "" + "fnegabs %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "minsf3" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (smin:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fmin %0 = %1, %F2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "maxsf3" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (smax:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fmax %0 = %1, %F2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*maddsf4" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_register_operand" "f")) + (match_operand:SF 3 "fr_reg_or_fp01_operand" "fG")))] + "" + "fma.s %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*msubsf4" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_register_operand" "f")) + (match_operand:SF 3 "fr_reg_or_fp01_operand" "fG")))] + "" + "fms.s %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmulsf3" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_register_operand" "f"))))] + "" + "fnmpy.s %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? + +(define_insn "*nmaddsf4" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (plus:SF (neg:SF (mult:SF + (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_register_operand" "f"))) + (match_operand:SF 3 "fr_reg_or_fp01_operand" "fG")))] + "" + "fnma.s %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_expand "divsf3" + [(set (match_operand:SF 0 "fr_register_operand" "") + (div:SF (match_operand:SF 1 "fr_register_operand" "") + (match_operand:SF 2 "fr_register_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx insn; + if (TARGET_INLINE_DIV_LAT) + insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]); + else + insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]); + emit_insn (insn); + DONE; +}") + +(define_insn_and_split "divsf3_internal_lat" + [(set (match_operand:SF 0 "fr_register_operand" "=&f") + (div:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_register_operand" "f"))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=f")) + (clobber (match_scratch:BI 5 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_LAT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 6) (div:TF (const_int 1) (match_dup 8))) + (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 7) (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 8) (match_dup 6))) + (match_dup 10))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 4) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 4) (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 4) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 4) (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 9) + (float_truncate:DF + (plus:TF (mult:TF (match_dup 4) (match_dup 3)) + (match_dup 3)))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (set (match_dup 0) + (float_truncate:SF (match_dup 6)))) + ] + "operands[6] = gen_rtx_REG (TFmode, REGNO (operands[0])); + operands[7] = gen_rtx_REG (TFmode, REGNO (operands[1])); + operands[8] = gen_rtx_REG (TFmode, REGNO (operands[2])); + operands[9] = gen_rtx_REG (DFmode, REGNO (operands[0])); + operands[10] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +(define_insn_and_split "divsf3_internal_thr" + [(set (match_operand:SF 0 "fr_register_operand" "=&f") + (div:SF (match_operand:SF 1 "fr_register_operand" "f") + (match_operand:SF 2 "fr_register_operand" "f"))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=f")) + (clobber (match_scratch:BI 5 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_THR" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 6) (div:TF (const_int 1) (match_dup 8))) + (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 8) (match_dup 6))) + (match_dup 10))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 3) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 6) + (plus:TF (mult:TF (match_dup 3) (match_dup 6)) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 9) + (float_truncate:SF + (mult:TF (match_dup 7) (match_dup 6)))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 8) (match_dup 3))) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (set (match_dup 0) + (float_truncate:SF + (plus:TF (mult:TF (match_dup 4) (match_dup 6)) + (match_dup 3))))) + ] + "operands[6] = gen_rtx_REG (TFmode, REGNO (operands[0])); + operands[7] = gen_rtx_REG (TFmode, REGNO (operands[1])); + operands[8] = gen_rtx_REG (TFmode, REGNO (operands[2])); + operands[9] = gen_rtx_REG (SFmode, REGNO (operands[3])); + operands[10] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit floating point arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "adddf3" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (plus:DF (match_operand:DF 1 "fr_register_operand" "%f") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fadd.d %0 = %1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*adddf3_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (plus:DF (match_operand:DF 1 "fr_register_operand" "%f") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG"))))] + "" + "fadd.s %0 = %1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "subdf3" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (minus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fsub.d %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*subdf3_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (minus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG"))))] + "" + "fsub.s %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "muldf3" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f")))] + "" + "fmpy.d %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*muldf3_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))))] + "" + "fmpy.s %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "absdf2" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (abs:DF (match_operand:DF 1 "fr_register_operand" "f")))] + "" + "fabs %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "negdf2" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (neg:DF (match_operand:DF 1 "fr_register_operand" "f")))] + "" + "fneg %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*nabsdf2" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (neg:DF (abs:DF (match_operand:DF 1 "fr_register_operand" "f"))))] + "" + "fnegabs %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "mindf3" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (smin:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fmin %0 = %1, %F2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "maxdf3" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (smax:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fmax %0 = %1, %F2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*madddf4" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f")) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG")))] + "" + "fma.d %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*madddf4_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f")) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG"))))] + "" + "fma.s %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*msubdf4" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f")) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG")))] + "" + "fms.d %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*msubdf4_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f")) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG"))))] + "" + "fms.s %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmuldf3" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))))] + "" + "fnmpy.d %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmuldf3_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f")))))] + "" + "fnmpy.s %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? + +(define_insn "*nmadddf4" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (plus:DF (neg:DF (mult:DF + (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG")))] + "" + "fnma.d %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmadddf4_alts" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (plus:DF (neg:DF (mult:DF + (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG"))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "" + "fnma.d.s%4 %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmadddf4_trunc" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (plus:DF (neg:DF (mult:DF + (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))) + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG"))))] + "" + "fnma.s %0 = %1, %2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_expand "divdf3" + [(set (match_operand:DF 0 "fr_register_operand" "") + (div:DF (match_operand:DF 1 "fr_register_operand" "") + (match_operand:DF 2 "fr_register_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx insn; + if (TARGET_INLINE_DIV_LAT) + insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]); + else + insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]); + emit_insn (insn); + DONE; +}") + +(define_insn_and_split "divdf3_internal_lat" + [(set (match_operand:DF 0 "fr_register_operand" "=&f") + (div:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:TF 5 "=&f")) + (clobber (match_scratch:BI 6 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_LAT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 7) (div:TF (const_int 1) (match_dup 9))) + (set (match_dup 6) (unspec:BI [(match_dup 8) (match_dup 9)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 8) (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 9) (match_dup 7))) + (match_dup 12))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 4) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) (mult:TF (match_dup 4) (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 7) + (plus:TF (mult:TF (match_dup 4) (match_dup 7)) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 5) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 5) (match_dup 5))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 7) + (plus:TF (mult:TF (match_dup 5) (match_dup 7)) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 10) + (float_truncate:DF + (plus:TF (mult:TF (match_dup 4) (match_dup 3)) + (match_dup 3)))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 7) + (plus:TF (mult:TF (match_dup 4) (match_dup 7)) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 11) + (float_truncate:DF + (plus:TF (neg:TF (mult:TF (match_dup 9) (match_dup 3))) + (match_dup 8)))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (set (match_dup 0) + (float_truncate:DF (plus:TF (mult:TF (match_dup 5) (match_dup 7)) + (match_dup 3))))) + ] + "operands[7] = gen_rtx_REG (TFmode, REGNO (operands[0])); + operands[8] = gen_rtx_REG (TFmode, REGNO (operands[1])); + operands[9] = gen_rtx_REG (TFmode, REGNO (operands[2])); + operands[10] = gen_rtx_REG (DFmode, REGNO (operands[3])); + operands[11] = gen_rtx_REG (DFmode, REGNO (operands[5])); + operands[12] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +(define_insn_and_split "divdf3_internal_thr" + [(set (match_operand:DF 0 "fr_register_operand" "=&f") + (div:DF (match_operand:DF 1 "fr_register_operand" "f") + (match_operand:DF 2 "fr_register_operand" "f"))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:DF 4 "=f")) + (clobber (match_scratch:BI 5 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_THR" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 6) (div:TF (const_int 1) (match_dup 8))) + (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 8) (match_dup 6))) + (match_dup 10))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 6) + (plus:TF (mult:TF (match_dup 3) (match_dup 6)) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 6) + (plus:TF (mult:TF (match_dup 3) (match_dup 6)) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 6) + (plus:TF (mult:TF (match_dup 3) (match_dup 6)) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 9) + (float_truncate:DF + (mult:TF (match_dup 7) (match_dup 3)))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:DF (neg:DF (mult:DF (match_dup 2) (match_dup 9))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (set (match_dup 0) + (plus:DF (mult:DF (match_dup 4) (match_dup 0)) + (match_dup 9)))) + ] + "operands[6] = gen_rtx_REG (TFmode, REGNO (operands[0])); + operands[7] = gen_rtx_REG (TFmode, REGNO (operands[1])); + operands[8] = gen_rtx_REG (TFmode, REGNO (operands[2])); + operands[9] = gen_rtx_REG (DFmode, REGNO (operands[3])); + operands[10] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: 80 bit floating point arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "addtf3" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fadd %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*addtf3_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (plus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fadd.s %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*addtf3_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (plus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fadd.d %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "subtf3" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (minus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fsub %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*subtf3_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (minus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fsub.s %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*subtf3_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (minus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fsub.d %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "multf3" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmpy %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*multf3_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmpy.s %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*multf3_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmpy.d %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*multf3_alts" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (use (match_operand:SI 3 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmpy.s%3 %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*multf3_truncsf_alts" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))) + (use (match_operand:SI 3 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmpy.s.s%3 %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*multf3_truncdf_alts" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))) + (use (match_operand:SI 3 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmpy.d.s%3 %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "abstf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fabs %0 = %F1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "negtf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (neg:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fneg %0 = %F1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*nabstf2" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (neg:TF (abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnegabs %0 = %F1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "mintf3" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (smin:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmin %0 = %F1, %F2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "maxtf3" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (smax:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fmax %0 = %F1, %F2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*maddtf4" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fma %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*maddtf4_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fma.s %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*maddtf4_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fma.d %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*maddtf4_alts" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fma.s%4 %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*maddtf4_alts_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fma.d.s%4 %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*msubtf4" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fms %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*msubtf4_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fms.s %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*msubtf4_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fms.d %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmultf3" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (neg:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnmpy %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmultf3_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnmpy.s %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmultf3_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnmpy.d %0 = %F1, %F2" + [(set_attr "itanium_class" "fmac")]) + +;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? + +(define_insn "*nmaddtf4" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnma %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmaddtf4_truncsf" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (float_truncate:SF + (plus:TF (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnma.s %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmaddtf4_truncdf" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (plus:TF (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnma.d %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmaddtf4_alts" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnma.s%4 %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*nmaddtf4_truncdf_alts" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (float_truncate:DF + (plus:TF (neg:TF + (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fnma.d.s%4 %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_expand "divtf3" + [(set (match_operand:TF 0 "fr_register_operand" "") + (div:TF (match_operand:TF 1 "fr_register_operand" "") + (match_operand:TF 2 "fr_register_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV" + " +{ + rtx insn; + if (TARGET_INLINE_DIV_LAT) + insn = gen_divtf3_internal_lat (operands[0], operands[1], operands[2]); + else + insn = gen_divtf3_internal_thr (operands[0], operands[1], operands[2]); + emit_insn (insn); + DONE; +}") + +(define_insn_and_split "divtf3_internal_lat" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (div:TF (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f"))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:TF 5 "=&f")) + (clobber (match_scratch:TF 6 "=&f")) + (clobber (match_scratch:BI 7 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_LAT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 7) (unspec:BI [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 8))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 5) (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 6) + (plus:TF (mult:TF (match_dup 3) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 5) (match_dup 5)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 5) + (plus:TF (mult:TF (match_dup 6) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 5) (match_dup 3)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 4))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 5) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 8))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 7) (const_int 0)) + (set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 3)))) + ] + "operands[8] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +(define_insn_and_split "divtf3_internal_thr" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (div:TF (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f"))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:BI 5 "=c"))] + "INTEL_EXTENDED_IEEE_FORMAT && TARGET_INLINE_DIV_THR" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 5) (unspec:BI [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 3) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 0) (match_dup 3)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 4))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 3)))) + ] + "operands[6] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +;; ??? frcpa works like cmp.foo.unc. + +(define_insn "*recip_approx" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (div:TF (const_int 1) + (match_operand:TF 3 "fr_register_operand" "f"))) + (set (match_operand:BI 1 "register_operand" "=c") + (unspec:BI [(match_operand:TF 2 "fr_register_operand" "f") + (match_dup 3)] 5)) + (use (match_operand:SI 4 "const_int_operand" ""))] + "INTEL_EXTENDED_IEEE_FORMAT" + "frcpa.s%4 %0, %1 = %2, %3" + [(set_attr "itanium_class" "fmisc") + (set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit Integer Shifts and Rotates +;; :: +;; :::::::::::::::::::: + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "gr_register_operand" "") + (ashift:SI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + /* Why oh why didn't Intel arrange for SHIFT_COUNT_TRUNCATED? Now + we've got to get rid of stray bits outside the SImode register. */ + rtx subshift = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (subshift, operands[2])); + operands[2] = subshift; + } +}") + +(define_insn "*ashlsi3_internal" + [(set (match_operand:SI 0 "gr_register_operand" "=r,r,r") + (ashift:SI (match_operand:SI 1 "gr_register_operand" "r,r,r") + (match_operand:DI 2 "gr_reg_or_5bit_operand" "R,n,r")))] + "" + "@ + shladd %0 = %1, %2, r0 + dep.z %0 = %1, %2, %E2 + shl %0 = %1, %2" + [(set_attr "itanium_class" "ialu,ishf,mmshf")]) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "gr_register_operand" "") + (ashiftrt:SI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))] + "" + " +{ + rtx subtarget = gen_reg_rtx (DImode); + if (GET_CODE (operands[2]) == CONST_INT) + emit_insn (gen_extv (subtarget, gen_lowpart (DImode, operands[1]), + GEN_INT (32 - INTVAL (operands[2])), operands[2])); + else + { + rtx subshift = gen_reg_rtx (DImode); + emit_insn (gen_extendsidi2 (subtarget, operands[1])); + emit_insn (gen_zero_extendsidi2 (subshift, operands[2])); + emit_insn (gen_ashrdi3 (subtarget, subtarget, subshift)); + } + emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget); + DONE; +}") + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "gr_register_operand" "") + (lshiftrt:SI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))] + "" + " +{ + rtx subtarget = gen_reg_rtx (DImode); + if (GET_CODE (operands[2]) == CONST_INT) + emit_insn (gen_extzv (subtarget, gen_lowpart (DImode, operands[1]), + GEN_INT (32 - INTVAL (operands[2])), operands[2])); + else + { + rtx subshift = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (subtarget, operands[1])); + emit_insn (gen_zero_extendsidi2 (subshift, operands[2])); + emit_insn (gen_lshrdi3 (subtarget, subtarget, subshift)); + } + emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget); + DONE; +}") + +;; Use mix4.r/shr to implement rotrsi3. We only get 32 bits of valid result +;; here, instead of 64 like the patterns above. Keep the pattern together +;; until after combine; otherwise it won't get matched often. + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "gr_register_operand" "") + (rotatert:SI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))] + "" + " +{ + if (GET_MODE (operands[2]) != VOIDmode) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (tmp, operands[2])); + operands[2] = tmp; + } +}") + +(define_insn_and_split "*rotrsi3_internal" + [(set (match_operand:SI 0 "gr_register_operand" "=&r") + (rotatert:SI (match_operand:SI 1 "gr_register_operand" "r") + (match_operand:DI 2 "gr_reg_or_5bit_operand" "rM")))] + "" + "#" + "reload_completed" + [(set (match_dup 3) + (ior:DI (zero_extend:DI (match_dup 1)) + (ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32)))) + (set (match_dup 3) + (lshiftrt:DI (match_dup 3) (match_dup 2)))] + "operands[3] = gen_rtx_REG (DImode, REGNO (operands[0]));") + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "gr_register_operand" "") + (rotate:SI (match_operand:SI 1 "gr_register_operand" "") + (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))] + "" + " +{ + if (! shift_32bit_count_operand (operands[2], SImode)) + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (tmp, GEN_INT (32), operands[2])); + emit_insn (gen_rotrsi3 (operands[0], operands[1], tmp)); + DONE; + } +}") + +(define_insn_and_split "*rotlsi3_internal" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (rotate:SI (match_operand:SI 1 "gr_register_operand" "r") + (match_operand:SI 2 "shift_32bit_count_operand" "n")))] + "" + "#" + "reload_completed" + [(set (match_dup 3) + (ior:DI (zero_extend:DI (match_dup 1)) + (ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32)))) + (set (match_dup 3) + (lshiftrt:DI (match_dup 3) (match_dup 2)))] + "operands[3] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = GEN_INT (32 - INTVAL (operands[2]));") + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit Integer Shifts and Rotates +;; :: +;; :::::::::::::::::::: + +(define_insn "ashldi3" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r,r") + (ashift:DI (match_operand:DI 1 "gr_register_operand" "r,r,r") + (match_operand:DI 2 "gr_reg_or_6bit_operand" "R,r,rM")))] + "" + "@ + shladd %0 = %1, %2, r0 + shl %0 = %1, %2 + shl %0 = %1, %2" + [(set_attr "itanium_class" "ialu,mmshf,mmshfi")]) + +;; ??? Maybe combine this with the multiply and add instruction? + +(define_insn "*shladd" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "shladd_operand" "n")) + (match_operand:DI 3 "gr_register_operand" "r")))] + "" + "shladd %0 = %1, %S2, %3" + [(set_attr "itanium_class" "ialu")]) + +;; This can be created by register elimination if operand3 of shladd is an +;; eliminable register or has reg_equiv_constant set. + +;; We have to use nonmemory_operand for operand 4, to ensure that the +;; validate_changes call inside eliminate_regs will always succeed. If it +;; doesn't succeed, then this remain a shladd pattern, and will be reloaded +;; incorrectly. + +(define_insn_and_split "*shladd_elim" + [(set (match_operand:DI 0 "gr_register_operand" "=&r") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "shladd_operand" "n")) + (match_operand:DI 3 "nonmemory_operand" "r")) + (match_operand:DI 4 "nonmemory_operand" "rI")))] + "reload_in_progress" + "* abort ();" + "reload_completed" + [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] + "" + [(set_attr "itanium_class" "unknown")]) + +(define_insn "ashrdi3" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r") + (ashiftrt:DI (match_operand:DI 1 "gr_register_operand" "r,r") + (match_operand:DI 2 "gr_reg_or_6bit_operand" "r,rM")))] + "" + "@ + shr %0 = %1, %2 + shr %0 = %1, %2" + [(set_attr "itanium_class" "mmshf,mmshfi")]) + +(define_insn "lshrdi3" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r") + (lshiftrt:DI (match_operand:DI 1 "gr_register_operand" "r,r") + (match_operand:DI 2 "gr_reg_or_6bit_operand" "r,rM")))] + "" + "@ + shr.u %0 = %1, %2 + shr.u %0 = %1, %2" + [(set_attr "itanium_class" "mmshf,mmshfi")]) + +;; Using a predicate that accepts only constants doesn't work, because optabs +;; will load the operand into a register and call the pattern if the predicate +;; did not accept it on the first try. So we use nonmemory_operand and then +;; verify that we have an appropriate constant in the expander. + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "gr_register_operand" "") + (rotatert:DI (match_operand:DI 1 "gr_register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! shift_count_operand (operands[2], DImode)) + FAIL; +}") + +(define_insn "*rotrdi3_internal" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "shift_count_operand" "M")))] + "" + "shrp %0 = %1, %1, %2" + [(set_attr "itanium_class" "ishf")]) + +(define_expand "rotldi3" + [(set (match_operand:DI 0 "gr_register_operand" "") + (rotate:DI (match_operand:DI 1 "gr_register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! shift_count_operand (operands[2], DImode)) + FAIL; +}") + +(define_insn "*rotldi3_internal" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (rotate:DI (match_operand:DI 1 "gr_register_operand" "r") + (match_operand:DI 2 "shift_count_operand" "M")))] + "" + "shrp %0 = %1, %1, %e2" + [(set_attr "itanium_class" "ishf")]) + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit Integer Logical operations +;; :: +;; :::::::::::::::::::: + +;; We don't seem to need any other 32-bit logical operations, because gcc +;; generates zero-extend;zero-extend;DImode-op, which combine optimizes to +;; DImode-op;zero-extend, and then we can optimize away the zero-extend. +;; This doesn't work for unary logical operations, because we don't call +;; apply_distributive_law for them. + +;; ??? Likewise, this doesn't work for andnot, which isn't handled by +;; apply_distributive_law. We get inefficient code for +;; int sub4 (int i, int j) { return i & ~j; } +;; We could convert (and (not (sign_extend A)) (sign_extend B)) to +;; (zero_extend (and (not A) B)) in combine. +;; Or maybe fix this by adding andsi3/iorsi3/xorsi3 patterns like the +;; one_cmplsi2 pattern. + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (not:SI (match_operand:SI 1 "gr_register_operand" "r")))] + "" + "andcm %0 = -1, %1" + [(set_attr "itanium_class" "ilog")]) + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit Integer Logical operations +;; :: +;; :::::::::::::::::::: + +(define_insn "anddi3" + [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f") + (and:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f") + (match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))] + "" + "@ + and %0 = %2, %1 + fand %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "*andnot" + [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f") + (and:DI (not:DI (match_operand:DI 1 "grfr_register_operand" "r,*f")) + (match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))] + "" + "@ + andcm %0 = %2, %1 + fandcm %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f") + (ior:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f") + (match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))] + "" + "@ + or %0 = %2, %1 + for %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f") + (xor:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f") + (match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))] + "" + "@ + xor %0 = %2, %1 + fxor %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "one_cmpldi2" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (not:DI (match_operand:DI 1 "gr_register_operand" "r")))] + "" + "andcm %0 = -1, %1" + [(set_attr "itanium_class" "ilog")]) + +;; :::::::::::::::::::: +;; :: +;; :: Comparisons +;; :: +;; :::::::::::::::::::: + +(define_expand "cmpbi" + [(set (cc0) + (compare (match_operand:BI 0 "register_operand" "") + (match_operand:BI 1 "const_int_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpsi" + [(set (cc0) + (compare (match_operand:SI 0 "gr_register_operand" "") + (match_operand:SI 1 "gr_reg_or_8bit_and_adjusted_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpdi" + [(set (cc0) + (compare (match_operand:DI 0 "gr_register_operand" "") + (match_operand:DI 1 "gr_reg_or_8bit_and_adjusted_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpsf" + [(set (cc0) + (compare (match_operand:SF 0 "fr_reg_or_fp01_operand" "") + (match_operand:SF 1 "fr_reg_or_fp01_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpdf" + [(set (cc0) + (compare (match_operand:DF 0 "fr_reg_or_fp01_operand" "") + (match_operand:DF 1 "fr_reg_or_fp01_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmptf" + [(set (cc0) + (compare (match_operand:TF 0 "tfreg_or_fp01_operand" "") + (match_operand:TF 1 "tfreg_or_fp01_operand" "")))] + "INTEL_EXTENDED_IEEE_FORMAT" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_insn "*cmpsi_normal" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "normal_comparison_operator" + [(match_operand:SI 2 "gr_register_operand" "r") + (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))] + "" + "cmp4.%C1 %0, %I0 = %3, %2" + [(set_attr "itanium_class" "icmp")]) + +;; We use %r3 because it is possible for us to match a 0, and two of the +;; unsigned comparisons don't accept immediate operands of zero. + +(define_insn "*cmpsi_adjusted" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "adjusted_comparison_operator" + [(match_operand:SI 2 "gr_register_operand" "r") + (match_operand:SI 3 "gr_reg_or_8bit_adjusted_operand" "rL")]))] + "" + "cmp4.%C1 %0, %I0 = %r3, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpdi_normal" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "normal_comparison_operator" + [(match_operand:DI 2 "gr_reg_or_0_operand" "rO") + (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))] + "" + "cmp.%C1 %0, %I0 = %3, %r2" + [(set_attr "itanium_class" "icmp")]) + +;; We use %r3 because it is possible for us to match a 0, and two of the +;; unsigned comparisons don't accept immediate operands of zero. + +(define_insn "*cmpdi_adjusted" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "adjusted_comparison_operator" + [(match_operand:DI 2 "gr_register_operand" "r") + (match_operand:DI 3 "gr_reg_or_8bit_adjusted_operand" "rL")]))] + "" + "cmp.%C1 %0, %I0 = %r3, %2" + [(set_attr "itanium_class" "icmp")]) + +(define_insn "*cmpsf_internal" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "comparison_operator" + [(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 3 "fr_reg_or_fp01_operand" "fG")]))] + "" + "fcmp.%D1 %0, %I0 = %F2, %F3" + [(set_attr "itanium_class" "fcmp")]) + +(define_insn "*cmpdf_internal" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "comparison_operator" + [(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG")]))] + "" + "fcmp.%D1 %0, %I0 = %F2, %F3" + [(set_attr "itanium_class" "fcmp")]) + +(define_insn "*cmptf_internal" + [(set (match_operand:BI 0 "register_operand" "=c") + (match_operator:BI 1 "comparison_operator" + [(match_operand:TF 2 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")]))] + "INTEL_EXTENDED_IEEE_FORMAT" + "fcmp.%D1 %0, %I0 = %F2, %F3" + [(set_attr "itanium_class" "fcmp")]) + +;; ??? Can this pattern be generated? + +(define_insn "*bit_zero" + [(set (match_operand:BI 0 "register_operand" "=c") + (eq:BI (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1) + (match_operand:DI 2 "immediate_operand" "n")) + (const_int 0)))] + "" + "tbit.z %0, %I0 = %1, %2" + [(set_attr "itanium_class" "tbit")]) + +(define_insn "*bit_one" + [(set (match_operand:BI 0 "register_operand" "=c") + (ne:BI (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r") + (const_int 1) + (match_operand:DI 2 "immediate_operand" "n")) + (const_int 0)))] + "" + "tbit.nz %0, %I0 = %1, %2" + [(set_attr "itanium_class" "tbit")]) + +;; :::::::::::::::::::: +;; :: +;; :: Branches +;; :: +;; :::::::::::::::::::: + +(define_expand "beq" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (EQ, VOIDmode);") + +(define_expand "bne" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (NE, VOIDmode);") + +(define_expand "blt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (LT, VOIDmode);") + +(define_expand "ble" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (LE, VOIDmode);") + +(define_expand "bgt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (GT, VOIDmode);") + +(define_expand "bge" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (GE, VOIDmode);") + +(define_expand "bltu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (LTU, VOIDmode);") + +(define_expand "bleu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (LEU, VOIDmode);") + +(define_expand "bgtu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (GTU, VOIDmode);") + +(define_expand "bgeu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (GEU, VOIDmode);") + +(define_expand "bunordered" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (UNORDERED, VOIDmode);") + +(define_expand "bordered" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "operands[1] = ia64_expand_compare (ORDERED, VOIDmode);") + +(define_insn "*br_true" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "(%J0) br.cond%+ %l2" + [(set_attr "itanium_class" "br") + (set_attr "predicable" "no")]) + +(define_insn "*br_false" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" + "(%j0) br.cond%+ %l2" + [(set_attr "itanium_class" "br") + (set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: Counted loop operations +;; :: +;; :::::::::::::::::::: + +(define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" "")) ; iterations; zero if unknown + (use (match_operand 2 "" "")) ; max iterations + (use (match_operand 3 "" "")) ; loop level + (use (match_operand 4 "" ""))] ; label + "" + " +{ + /* Only use cloop on innermost loops. */ + if (INTVAL (operands[3]) > 1) + FAIL; + emit_jump_insn (gen_doloop_end_internal (gen_rtx_REG (DImode, AR_LC_REGNUM), + operands[4])); + DONE; +}") + +(define_insn "doloop_end_internal" + [(set (pc) (if_then_else (ne (match_operand:DI 0 "ar_lc_reg_operand" "") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 0) (const_int 0)) + (match_dup 0) + (plus:DI (match_dup 0) (const_int -1))))] + "" + "br.cloop.sptk.few %l1" + [(set_attr "itanium_class" "br") + (set_attr "predicable" "no")]) + +;; :::::::::::::::::::: +;; :: +;; :: Set flag operations +;; :: +;; :::::::::::::::::::: + +(define_expand "seq" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (EQ, DImode);") + +(define_expand "sne" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (NE, DImode);") + +(define_expand "slt" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (LT, DImode);") + +(define_expand "sle" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (LE, DImode);") + +(define_expand "sgt" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (GT, DImode);") + +(define_expand "sge" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (GE, DImode);") + +(define_expand "sltu" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (LTU, DImode);") + +(define_expand "sleu" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (LEU, DImode);") + +(define_expand "sgtu" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (GTU, DImode);") + +(define_expand "sgeu" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (GEU, DImode);") + +(define_expand "sunordered" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (UNORDERED, DImode);") + +(define_expand "sordered" + [(set (match_operand:DI 0 "gr_register_operand" "") (match_dup 1))] + "" + "operands[1] = ia64_expand_compare (ORDERED, DImode);") + +;; Don't allow memory as destination here, because cmov/cmov/st is more +;; efficient than mov/mov/cst/cst. + +(define_insn_and_split "*sne_internal" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (ne:DI (match_operand:BI 1 "register_operand" "c") + (const_int 0)))] + "" + "#" + "reload_completed" + [(cond_exec (ne (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (cond_exec (eq (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 0)))] + "" + [(set_attr "itanium_class" "unknown")]) + +(define_insn_and_split "*seq_internal" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (eq:DI (match_operand:BI 1 "register_operand" "c") + (const_int 0)))] + "" + "#" + "reload_completed" + [(cond_exec (ne (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 0))) + (cond_exec (eq (match_dup 1) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "" + [(set_attr "itanium_class" "unknown")]) + +;; :::::::::::::::::::: +;; :: +;; :: Conditional move instructions. +;; :: +;; :::::::::::::::::::: + +;; ??? Add movXXcc patterns? + +;; +;; DImode if_then_else patterns. +;; + +(define_insn "*cmovdi_internal" + [(set (match_operand:DI 0 "destination_operand" + "= r, r, r, r, r, r, r, r, r, r, m, Q, *f,*b,*d*e") + (if_then_else:DI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" + "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c") + (const_int 0)]) + (match_operand:DI 2 "move_operand" + "rim, *f, *b,*d*e,rim,rim, rim,*f,*b,*d*e,rO,*f,rOQ,rO, rK") + (match_operand:DI 3 "move_operand" + "rim,rim,rim, rim, *f, *b,*d*e,*f,*b,*d*e,rO,*f,rOQ,rO, rK")))] + "ia64_move_ok (operands[0], operands[2]) + && ia64_move_ok (operands[0], operands[3])" + "* abort ();" + [(set_attr "predicable" "no")]) + +(define_split + [(set (match_operand 0 "destination_operand" "") + (if_then_else + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "") + (const_int 0)]) + (match_operand 2 "move_operand" "") + (match_operand 3 "move_operand" "")))] + "reload_completed" + [(const_int 0)] + " +{ + rtx tmp; + if (! rtx_equal_p (operands[0], operands[2])) + { + tmp = gen_rtx_SET (VOIDmode, operands[0], operands[2]); + tmp = gen_rtx_COND_EXEC (VOIDmode, operands[4], tmp); + emit_insn (tmp); + } + if (! rtx_equal_p (operands[0], operands[3])) + { + tmp = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE, + VOIDmode, operands[1], const0_rtx); + tmp = gen_rtx_COND_EXEC (VOIDmode, tmp, + gen_rtx_SET (VOIDmode, operands[0], + operands[3])); + emit_insn (tmp); + } + DONE; +}") + +;; Absolute value pattern. + +(define_insn "*absdi2_internal" + [(set (match_operand:DI 0 "gr_register_operand" "=r,r") + (if_then_else:DI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c") + (const_int 0)]) + (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" "rI,rI")) + (match_operand:DI 3 "gr_reg_or_22bit_operand" "0,rI")))] + "" + "#" + [(set_attr "itanium_class" "ialu,unknown") + (set_attr "predicable" "no")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c") + (const_int 0)]) + (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" "")) + (match_operand:DI 3 "gr_reg_or_22bit_operand" "")))] + "reload_completed && rtx_equal_p (operands[0], operands[3])" + [(cond_exec + (match_dup 4) + (set (match_dup 0) + (neg:DI (match_dup 2))))] + "") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c") + (const_int 0)]) + (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" "")) + (match_operand:DI 3 "gr_reg_or_22bit_operand" "")))] + "reload_completed" + [(cond_exec + (match_dup 4) + (set (match_dup 0) (neg:DI (match_dup 2)))) + (cond_exec + (match_dup 5) + (set (match_dup 0) (match_dup 3)))] + " +{ + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE, + VOIDmode, operands[1], const0_rtx); +}") + +;; +;; SImode if_then_else patterns. +;; + +(define_insn "*cmovsi_internal" + [(set (match_operand:SI 0 "destination_operand" "=r,m,*f,r,m,*f,r,m,*f") + (if_then_else:SI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c,c,c,c,c,c,c,c") + (const_int 0)]) + (match_operand:SI 2 "move_operand" + "0,0,0,rim*f,rO,rO,rim*f,rO,rO") + (match_operand:SI 3 "move_operand" + "rim*f,rO,rO,0,0,0,rim*f,rO,rO")))] + "ia64_move_ok (operands[0], operands[2]) + && ia64_move_ok (operands[0], operands[3])" + "* abort ();" + [(set_attr "predicable" "no")]) + +(define_insn "*abssi2_internal" + [(set (match_operand:SI 0 "gr_register_operand" "=r,r") + (if_then_else:SI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c") + (const_int 0)]) + (neg:SI (match_operand:SI 3 "gr_reg_or_22bit_operand" "rI,rI")) + (match_operand:SI 2 "gr_reg_or_22bit_operand" "0,rI")))] + "" + "#" + [(set_attr "itanium_class" "ialu,unknown") + (set_attr "predicable" "no")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c") + (const_int 0)]) + (neg:SI (match_operand:SI 2 "gr_reg_or_22bit_operand" "")) + (match_operand:SI 3 "gr_reg_or_22bit_operand" "")))] + "reload_completed && rtx_equal_p (operands[0], operands[3])" + [(cond_exec + (match_dup 4) + (set (match_dup 0) + (neg:SI (match_dup 2))))] + "") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI + (match_operator 4 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c,c") + (const_int 0)]) + (neg:SI (match_operand:SI 2 "gr_reg_or_22bit_operand" "")) + (match_operand:SI 3 "gr_reg_or_22bit_operand" "")))] + "reload_completed" + [(cond_exec + (match_dup 4) + (set (match_dup 0) (neg:SI (match_dup 2)))) + (cond_exec + (match_dup 5) + (set (match_dup 0) (match_dup 3)))] + " +{ + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE, + VOIDmode, operands[1], const0_rtx); +}") + +(define_insn_and_split "*cond_opsi2_internal" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (match_operator:SI 5 "condop_operator" + [(if_then_else:SI + (match_operator 6 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)]) + (match_operand:SI 2 "gr_register_operand" "r") + (match_operand:SI 3 "gr_register_operand" "r")) + (match_operand:SI 4 "gr_register_operand" "r")]))] + "" + "#" + "reload_completed" + [(cond_exec + (match_dup 6) + (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 2) (match_dup 4)]))) + (cond_exec + (match_dup 7) + (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 3) (match_dup 4)])))] + " +{ + operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[6]) == NE ? EQ : NE, + VOIDmode, operands[1], const0_rtx); +}" + [(set_attr "itanium_class" "ialu") + (set_attr "predicable" "no")]) + + +(define_insn_and_split "*cond_opsi2_internal_b" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (match_operator:SI 5 "condop_operator" + [(match_operand:SI 4 "gr_register_operand" "r") + (if_then_else:SI + (match_operator 6 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)]) + (match_operand:SI 2 "gr_register_operand" "r") + (match_operand:SI 3 "gr_register_operand" "r"))]))] + "" + "#" + "reload_completed" + [(cond_exec + (match_dup 6) + (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 4) (match_dup 2)]))) + (cond_exec + (match_dup 7) + (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 4) (match_dup 3)])))] + " +{ + operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[6]) == NE ? EQ : NE, + VOIDmode, operands[1], const0_rtx); +}" + [(set_attr "itanium_class" "ialu") + (set_attr "predicable" "no")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Call and branch instructions +;; :: +;; :::::::::::::::::::: + +;; Subroutine call instruction returning no value. Operand 0 is the function +;; to call; operand 1 is the number of bytes of arguments pushed (in mode +;; `SImode', except it is normally a `const_int'); operand 2 is the number of +;; registers used as operands. + +;; On most machines, operand 2 is not actually stored into the RTL pattern. It +;; is supplied for the sake of some RISC machines which need to put this +;; information into the assembler code; they can put it in the RTL instead of +;; operand 1. + +(define_expand "call" + [(use (match_operand:DI 0 "" "")) + (use (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" ""))] + "" + " +{ + ia64_expand_call (NULL_RTX, operands[0], operands[2], 0); + DONE; +}") + +(define_expand "sibcall" + [(use (match_operand:DI 0 "" "")) + (use (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" ""))] + "" + " +{ + ia64_expand_call (NULL_RTX, operands[0], operands[2], 1); + DONE; +}") + +;; Subroutine call instruction returning a value. Operand 0 is the hard +;; register in which the value is returned. There are three more operands, +;; the same as the three operands of the `call' instruction (but with numbers +;; increased by one). +;; +;; Subroutines that return `BLKmode' objects use the `call' insn. + +(define_expand "call_value" + [(use (match_operand 0 "" "")) + (use (match_operand:DI 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" ""))] + "" + " +{ + ia64_expand_call (operands[0], operands[1], operands[3], 0); + DONE; +}") + +(define_expand "sibcall_value" + [(use (match_operand 0 "" "")) + (use (match_operand:DI 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" ""))] + "" + " +{ + ia64_expand_call (operands[0], operands[1], operands[3], 1); + DONE; +}") + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" + " +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}") + +(define_insn "call_nopic" + [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i")) + (match_operand 1 "" "")) + (clobber (match_operand:DI 2 "register_operand" "=b,b"))] + "" + "br.call%+.many %2 = %0" + [(set_attr "itanium_class" "br,scall")]) + +(define_insn "call_value_nopic" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "b,i")) + (match_operand 2 "" ""))) + (clobber (match_operand:DI 3 "register_operand" "=b,b"))] + "" + "br.call%+.many %3 = %1" + [(set_attr "itanium_class" "br,scall")]) + +(define_insn "sibcall_nopic" + [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i")) + (match_operand 1 "" "")) + (use (match_operand:DI 2 "register_operand" "=b,b")) + (use (match_operand:DI 3 "ar_pfs_reg_operand" ""))] + "" + "br%+.many %0" + [(set_attr "itanium_class" "br,scall")]) + +(define_insn "call_pic" + [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i")) + (match_operand 1 "" "")) + (use (unspec [(reg:DI 1)] 9)) + (clobber (match_operand:DI 2 "register_operand" "=b,b"))] + "" + "br.call%+.many %2 = %0" + [(set_attr "itanium_class" "br,scall")]) + +(define_insn "call_value_pic" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "b,i")) + (match_operand 2 "" ""))) + (use (unspec [(reg:DI 1)] 9)) + (clobber (match_operand:DI 3 "register_operand" "=b,b"))] + "" + "br.call%+.many %3 = %1" + [(set_attr "itanium_class" "br,scall")]) + +(define_insn "sibcall_pic" + [(call (mem:DI (match_operand:DI 0 "call_operand" "bi")) + (match_operand 1 "" "")) + (use (unspec [(reg:DI 1)] 9)) + (use (match_operand:DI 2 "register_operand" "=b")) + (use (match_operand:DI 3 "ar_pfs_reg_operand" ""))] + "" + "br%+.many %0" + [(set_attr "itanium_class" "br")]) + +(define_insn "return_internal" + [(return) + (use (match_operand:DI 0 "register_operand" "b"))] + "" + "br.ret.sptk.many %0" + [(set_attr "itanium_class" "br")]) + +(define_insn "return" + [(return)] + "ia64_direct_return ()" + "br.ret.sptk.many rp" + [(set_attr "itanium_class" "br")]) + +(define_insn "*return_true" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)]) + (return) + (pc)))] + "ia64_direct_return ()" + "(%J0) br.ret%+.many rp" + [(set_attr "itanium_class" "br") + (set_attr "predicable" "no")]) + +(define_insn "*return_false" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)]) + (pc) + (return)))] + "ia64_direct_return ()" + "(%j0) br.ret%+.many rp" + [(set_attr "itanium_class" "br") + (set_attr "predicable" "no")]) + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "br %l0" + [(set_attr "itanium_class" "br")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:DI 0 "register_operand" "b"))] + "" + "br %0" + [(set_attr "itanium_class" "br")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:DI 0 "memory_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + rtx op0 = operands[0]; + rtx addr; + + /* ??? Bother -- do_tablejump is "helpful" and pulls the table + element into a register without bothering to see whether that + is necessary given the operand predicate. Check for MEM just + in case someone fixes this. */ + if (GET_CODE (op0) == MEM) + addr = XEXP (op0, 0); + else + { + /* Otherwise, cheat and guess that the previous insn in the + stream was the memory load. Grab the address from that. + Note we have to momentarily pop out of the sequence started + by the insn-emit wrapper in order to grab the last insn. */ + rtx last, set; + + end_sequence (); + last = get_last_insn (); + start_sequence (); + set = single_set (last); + + if (! rtx_equal_p (SET_DEST (set), op0) + || GET_CODE (SET_SRC (set)) != MEM) + abort (); + addr = XEXP (SET_SRC (set), 0); + if (rtx_equal_p (addr, op0)) + abort (); + } + + /* Jump table elements are stored pc-relative. That is, a displacement + from the entry to the label. Thus to convert to an absolute address + we add the address of the memory from which the value is loaded. */ + operands[0] = expand_simple_binop (DImode, PLUS, op0, addr, + NULL_RTX, 1, OPTAB_DIRECT); +}) + +(define_insn "*tablejump_internal" + [(set (pc) (match_operand:DI 0 "register_operand" "b")) + (use (label_ref (match_operand 1 "" "")))] + "" + "br %0" + [(set_attr "itanium_class" "br")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Prologue and Epilogue instructions +;; :: +;; :::::::::::::::::::: + +(define_expand "prologue" + [(const_int 1)] + "" + " +{ + ia64_expand_prologue (); + DONE; +}") + +(define_expand "epilogue" + [(return)] + "" + " +{ + ia64_expand_epilogue (0); + DONE; +}") + +(define_expand "sibcall_epilogue" + [(return)] + "" + " +{ + ia64_expand_epilogue (1); + DONE; +}") + +;; This prevents the scheduler from moving the SP decrement past FP-relative +;; stack accesses. This is the same as adddi3 plus the extra set. + +(define_insn "prologue_allocate_stack" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r,a") + (match_operand:DI 2 "gr_reg_or_22bit_operand" "r,I,J"))) + (set (match_operand:DI 3 "register_operand" "=r,r,r") + (match_dup 3))] + "" + "@ + add %0 = %1, %2 + adds %0 = %2, %1 + addl %0 = %2, %1" + [(set_attr "itanium_class" "ialu")]) + +;; This prevents the scheduler from moving the SP restore past FP-relative +;; stack accesses. This is similar to movdi plus the extra set. + +(define_insn "epilogue_deallocate_stack" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "register_operand" "+r")) + (set (match_dup 1) (match_dup 1))] + "" + "mov %0 = %1" + [(set_attr "itanium_class" "ialu")]) + +;; Allocate a new register frame. + +(define_insn "alloc" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] 0)) + (use (match_operand:DI 1 "const_int_operand" "i")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (match_operand:DI 3 "const_int_operand" "i")) + (use (match_operand:DI 4 "const_int_operand" "i"))] + "" + "alloc %0 = ar.pfs, %1, %2, %3, %4" + [(set_attr "itanium_class" "syst_m0") + (set_attr "predicable" "no")]) + +;; Modifies ar.unat +(define_expand "gr_spill" + [(parallel [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "")] 1)) + (clobber (match_dup 3))])] + "" + "operands[3] = gen_rtx_REG (DImode, AR_UNAT_REGNUM);") + +(define_insn "gr_spill_internal" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "")] 1)) + (clobber (match_operand:DI 3 "register_operand" ""))] + "" + "* +{ + return \".mem.offset %2, 0\;%,st8.spill %0 = %1%P0\"; +}" + [(set_attr "itanium_class" "st")]) + +;; Reads ar.unat +(define_expand "gr_restore" + [(parallel [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "const_int_operand" "")] 2)) + (use (match_dup 3))])] + "" + "operands[3] = gen_rtx_REG (DImode, AR_UNAT_REGNUM);") + +(define_insn "gr_restore_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "const_int_operand" "")] 2)) + (use (match_operand:DI 3 "register_operand" ""))] + "" + "* +{ + return \".mem.offset %2, 0\;%,ld8.fill %0 = %1%P1\"; +}" + [(set_attr "itanium_class" "ld")]) + +(define_insn "fr_spill" + [(set (match_operand:TF 0 "memory_operand" "=m") + (unspec:TF [(match_operand:TF 1 "register_operand" "f")] 3))] + "" + "stf.spill %0 = %1%P0" + [(set_attr "itanium_class" "stf")]) + +(define_insn "fr_restore" + [(set (match_operand:TF 0 "register_operand" "=f") + (unspec:TF [(match_operand:TF 1 "memory_operand" "m")] 4))] + "" + "ldf.fill %0 = %1%P1" + [(set_attr "itanium_class" "fld")]) + +;; ??? The explicit stop is not ideal. It would be better if +;; rtx_needs_barrier took care of this, but this is something that can be +;; fixed later. This avoids an RSE DV. + +(define_insn "bsp_value" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] 20))] + "" + ";;\;mov %0 = ar.bsp" + [(set_attr "itanium_class" "frar_i")]) + +(define_insn "set_bsp" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")] 5)] + "" + "flushrs\;mov r19=ar.rsc\;;;\;and r19=0x1c,r19\;;;\;mov ar.rsc=r19\;;;\;mov ar.bspstore=%0\;;;\;or r19=0x3,r19\;;;\;loadrs\;invala\;;;\;mov ar.rsc=r19" + [(set_attr "itanium_class" "unknown") + (set_attr "predicable" "no")]) + +;; ??? The explicit stops are not ideal. It would be better if +;; rtx_needs_barrier took care of this, but this is something that can be +;; fixed later. This avoids an RSE DV. + +(define_insn "flushrs" + [(unspec [(const_int 0)] 21)] + "" + ";;\;flushrs\;;;" + [(set_attr "itanium_class" "rse_m")]) + +;; :::::::::::::::::::: +;; :: +;; :: Miscellaneous instructions +;; :: +;; :::::::::::::::::::: + +;; ??? Emiting a NOP instruction isn't very useful. This should probably +;; be emitting ";;" to force a break in the instruction packing. + +;; No operation, needed in case the user uses -g but not -O. +(define_insn "nop" + [(const_int 0)] + "" + "nop 0" + [(set_attr "itanium_class" "unknown")]) + +(define_insn "nop_m" + [(const_int 1)] + "" + "nop.m 0" + [(set_attr "itanium_class" "nop_m")]) + +(define_insn "nop_i" + [(const_int 2)] + "" + "nop.i 0" + [(set_attr "itanium_class" "nop_i")]) + +(define_insn "nop_f" + [(const_int 3)] + "" + "nop.f 0" + [(set_attr "itanium_class" "nop_f")]) + +(define_insn "nop_b" + [(const_int 4)] + "" + "nop.b 0" + [(set_attr "itanium_class" "nop_b")]) + +(define_insn "nop_x" + [(const_int 5)] + "" + "" + [(set_attr "itanium_class" "nop_x")]) + +(define_insn "cycle_display" + [(unspec [(match_operand 0 "const_int_operand" "")] 23)] + "" + "// cycle %0" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) + +(define_insn "bundle_selector" + [(unspec [(match_operand 0 "const_int_operand" "")] 22)] + "" + "* +{ + return get_bundle_name (INTVAL (operands[0])); +}" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) + +;; Pseudo instruction that prevents the scheduler from moving code above this +;; point. +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] 1)] + "" + "" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) + +(define_insn "insn_group_barrier" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 2)] + "" + ";;" + [(set_attr "itanium_class" "stop_bit") + (set_attr "predicable" "no")]) + +(define_insn "break_f" + [(unspec_volatile [(const_int 0)] 3)] + "" + "break.f 0" + [(set_attr "itanium_class" "nop_f")]) + +(define_insn "prefetch" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n"))] + "" +{ + static const char * const alt[2][4] = { + { + "lfetch.nta [%0]", + "lfetch.nt1 [%0]", + "lfetch.nt2 [%0]", + "lfetch [%0]" + }, + { + "lfetch.excl.nta [%0]", + "lfetch.excl.nt1 [%0]", + "lfetch.excl.nt2 [%0]", + "lfetch.excl [%0]" + } + }; + int i = (INTVAL (operands[1])); + int j = (INTVAL (operands[2])); + + if (i != 0 && i != 1) + abort (); + if (j < 0 || j > 3) + abort (); + return alt[i][j]; +} + [(set_attr "itanium_class" "lfetch")]) + +;; Non-local goto support. + +(define_expand "save_stack_nonlocal" + [(use (match_operand:OI 0 "memory_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "" + " +{ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, + \"__ia64_save_stack_nonlocal\"), + 0, VOIDmode, 2, XEXP (operands[0], 0), Pmode, + operands[1], Pmode); + DONE; +}") + +(define_expand "nonlocal_goto" + [(use (match_operand 0 "general_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" "")) + (use (match_operand 3 "general_operand" ""))] + "" + " +{ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, \"__ia64_nonlocal_goto\"), + LCT_NORETURN, VOIDmode, 3, + operands[1], Pmode, + copy_to_reg (XEXP (operands[2], 0)), Pmode, + operands[3], Pmode); + emit_barrier (); + DONE; +}") + +;; The rest of the setjmp processing happens with the nonlocal_goto expander. +;; ??? This is not tested. +(define_expand "builtin_setjmp_setup" + [(use (match_operand:DI 0 "" ""))] + "" + " +{ + emit_move_insn (ia64_gp_save_reg (0), gen_rtx_REG (DImode, GR_REG (1))); + DONE; +}") + +(define_expand "builtin_setjmp_receiver" + [(use (match_operand:DI 0 "" ""))] + "" + " +{ + emit_move_insn (gen_rtx_REG (DImode, GR_REG (1)), ia64_gp_save_reg (0)); + DONE; +}") + +(define_expand "eh_epilogue" + [(use (match_operand:DI 0 "register_operand" "r")) + (use (match_operand:DI 1 "register_operand" "r")) + (use (match_operand:DI 2 "register_operand" "r"))] + "" + " +{ + rtx bsp = gen_rtx_REG (Pmode, 10); + rtx sp = gen_rtx_REG (Pmode, 9); + + if (GET_CODE (operands[0]) != REG || REGNO (operands[0]) != 10) + { + emit_move_insn (bsp, operands[0]); + operands[0] = bsp; + } + if (GET_CODE (operands[2]) != REG || REGNO (operands[2]) != 9) + { + emit_move_insn (sp, operands[2]); + operands[2] = sp; + } + emit_insn (gen_rtx_USE (VOIDmode, sp)); + emit_insn (gen_rtx_USE (VOIDmode, bsp)); + + cfun->machine->ia64_eh_epilogue_sp = sp; + cfun->machine->ia64_eh_epilogue_bsp = bsp; +}") + +;; Builtin apply support. + +(define_expand "restore_stack_nonlocal" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:OI 1 "memory_operand" ""))] + "" + " +{ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, + \"__ia64_restore_stack_nonlocal\"), + 0, VOIDmode, 1, + copy_to_reg (XEXP (operands[1], 0)), Pmode); + DONE; +}") + + +;;; Intrinsics support. + +(define_expand "mf" + [(set (mem:BLK (match_dup 0)) + (unspec:BLK [(mem:BLK (match_dup 0))] 12))] + "" + " +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (DImode)); + MEM_VOLATILE_P (operands[0]) = 1; +}") + +(define_insn "*mf_internal" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_operand:BLK 1 "" "")] 12))] + "" + "mf" + [(set_attr "itanium_class" "syst_m")]) + +(define_insn "fetchadd_acq_si" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (match_dup 1)) + (set (match_operand:SI 1 "not_postinc_memory_operand" "+S") + (unspec:SI [(match_dup 1) + (match_operand:SI 2 "fetchadd_operand" "n")] 19))] + "" + "fetchadd4.acq %0 = %1, %2" + [(set_attr "itanium_class" "sem")]) + +(define_insn "fetchadd_acq_di" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (match_dup 1)) + (set (match_operand:DI 1 "not_postinc_memory_operand" "+S") + (unspec:DI [(match_dup 1) + (match_operand:DI 2 "fetchadd_operand" "n")] 19))] + "" + "fetchadd8.acq %0 = %1, %2" + [(set_attr "itanium_class" "sem")]) + +(define_insn "cmpxchg_acq_si" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (match_dup 1)) + (set (match_operand:SI 1 "not_postinc_memory_operand" "+S") + (unspec:SI [(match_dup 1) + (match_operand:SI 2 "gr_register_operand" "r") + (match_operand:SI 3 "ar_ccv_reg_operand" "")] 13))] + "" + "cmpxchg4.acq %0 = %1, %2, %3" + [(set_attr "itanium_class" "sem")]) + +(define_insn "cmpxchg_acq_di" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (match_dup 1)) + (set (match_operand:DI 1 "not_postinc_memory_operand" "+S") + (unspec:DI [(match_dup 1) + (match_operand:DI 2 "gr_register_operand" "r") + (match_operand:DI 3 "ar_ccv_reg_operand" "")] 13))] + "" + "cmpxchg8.acq %0 = %1, %2, %3" + [(set_attr "itanium_class" "sem")]) + +(define_insn "xchgsi" + [(set (match_operand:SI 0 "gr_register_operand" "=r") + (match_operand:SI 1 "not_postinc_memory_operand" "+S")) + (set (match_dup 1) + (match_operand:SI 2 "gr_register_operand" "r"))] + "" + "xchg4 %0 = %1, %2" + [(set_attr "itanium_class" "sem")]) + +(define_insn "xchgdi" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (match_operand:DI 1 "not_postinc_memory_operand" "+S")) + (set (match_dup 1) + (match_operand:DI 2 "gr_register_operand" "r"))] + "" + "xchg8 %0 = %1, %2" + [(set_attr "itanium_class" "sem")]) + +;; Predication. + +(define_cond_exec + [(match_operator 0 "predicate_operator" + [(match_operand:BI 1 "register_operand" "c") + (const_int 0)])] + "" + "(%J0)") + +(define_insn "pred_rel_mutex" + [(set (match_operand:BI 0 "register_operand" "+c") + (unspec:BI [(match_dup 0)] 7))] + "" + ".pred.rel.mutex %0, %I0" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) + +(define_insn "safe_across_calls_all" + [(unspec_volatile [(const_int 0)] 8)] + "" + ".pred.safe_across_calls p1-p63" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) + +(define_insn "safe_across_calls_normal" + [(unspec_volatile [(const_int 0)] 9)] + "" + "* +{ + emit_safe_across_calls (asm_out_file); + return \"\"; +}" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) + +;; +;; +;; UNSPEC instruction definition to "swizzle" 32 bit pointer into 64 bit +;; pointer. This is used by the HP-UX 32 bit mode. + +(define_insn "ptr_extend" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (unspec:DI [(match_operand:SI 1 "gr_register_operand" "r")] 24))] + "" + "addp4 %0 = 0,%1" + [(set_attr "itanium_class" "ialu")]) + +;; +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for EH handling +(define_insn "prologue_use" + [(unspec:DI [(match_operand:DI 0 "register_operand" "")] 25)] + "" + "// %0 needed for EH" + [(set_attr "itanium_class" "ignore") + (set_attr "predicable" "no")]) diff --git a/contrib/gcc/config/ia64/ia64intrin.h b/contrib/gcc/config/ia64/ia64intrin.h new file mode 100644 index 0000000..c7bbd33 --- /dev/null +++ b/contrib/gcc/config/ia64/ia64intrin.h @@ -0,0 +1,132 @@ +#ifndef _IA64INTRIN_H_INCLUDED +#define _IA64INTRIN_H_INCLUDED + +/* Actually, everything is a compiler builtin, but just so + there's no confusion... */ +#ifdef __cplusplus +extern "C" { +#endif + +extern void __sync_synchronize (void); + +extern int __sync_val_compare_and_swap_si (int *, int, int); +extern long __sync_val_compare_and_swap_di (long *, long, long); +#define __sync_val_compare_and_swap(PTR, OLD, NEW) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) \ + __sync_val_compare_and_swap_si((int *)(PTR),(int)(OLD),(int)(NEW)) \ + : (__typeof__(*(PTR))) \ + __sync_val_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW))) + +extern int __sync_bool_compare_and_swap_si (int *, int, int); +extern long __sync_bool_compare_and_swap_di (long *, long, long); +#define __sync_bool_compare_and_swap(PTR, OLD, NEW) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) \ + __sync_bool_compare_and_swap_si((int *)(PTR),(int)(OLD),(int)(NEW)) \ + : (__typeof__(*(PTR))) \ + __sync_bool_compare_and_swap_di((long *)(PTR),(long)(OLD),(long)(NEW))) + +extern void __sync_lock_release_si (int *); +extern void __sync_lock_release_di (long *); +#define __sync_lock_release(PTR) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? __sync_lock_release_si((int *)(PTR)) \ + : __sync_lock_release_di((long *)(PTR))) + +extern int __sync_lock_test_and_set_si (int *, int); +extern long __sync_lock_test_and_set_di (long *, long); +#define __sync_lock_test_and_set(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_lock_test_and_set_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_lock_test_and_set_di((long *)(PTR),(long)(VAL))) + +extern int __sync_fetch_and_add_si (int *, int); +extern long __sync_fetch_and_add_di (long *, long); +#define __sync_fetch_and_add(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_fetch_and_add_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_fetch_and_add_di((long *)(PTR),(long)(VAL))) + +extern int __sync_fetch_and_sub_si (int *, int); +extern long __sync_fetch_and_sub_di (long *, long); +#define __sync_fetch_and_sub(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_fetch_and_sub_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_fetch_and_sub_di((long *)(PTR),(long)(VAL))) + +extern int __sync_fetch_and_and_si (int *, int); +extern long __sync_fetch_and_and_di (long *, long); +#define __sync_fetch_and_and(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_fetch_and_and_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_fetch_and_and_di((long *)(PTR),(long)(VAL))) + +extern int __sync_fetch_and_or_si (int *, int); +extern long __sync_fetch_and_or_di (long *, long); +#define __sync_fetch_and_or(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_fetch_and_or_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_fetch_and_or_di((long *)(PTR),(long)(VAL))) + +extern int __sync_fetch_and_xor_si (int *, int); +extern long __sync_fetch_and_xor_di (long *, long); +#define __sync_fetch_and_xor(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_fetch_and_xor_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_fetch_and_xor_di((long *)(PTR),(long)(VAL))) + +extern int __sync_fetch_and_nand_si (int *, int); +extern long __sync_fetch_and_nand_di (long *, long); +#define __sync_fetch_and_nand(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_fetch_and_nand_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_fetch_and_nand_di((long *)(PTR),(long)(VAL))) + +extern int __sync_add_and_fetch_si (int *, int); +extern long __sync_add_and_fetch_di (long *, long); +#define __sync_add_and_fetch(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_add_and_fetch_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_add_and_fetch_di((long *)(PTR),(long)(VAL))) + +extern int __sync_sub_and_fetch_si (int *, int); +extern long __sync_sub_and_fetch_di (long *, long); +#define __sync_sub_and_fetch(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_sub_and_fetch_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_sub_and_fetch_di((long *)(PTR),(long)(VAL))) + +extern int __sync_and_and_fetch_si (int *, int); +extern long __sync_and_and_fetch_di (long *, long); +#define __sync_and_and_fetch(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_and_and_fetch_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_and_and_fetch_di((long *)(PTR),(long)(VAL))) + +extern int __sync_or_and_fetch_si (int *, int); +extern long __sync_or_and_fetch_di (long *, long); +#define __sync_or_and_fetch(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_or_and_fetch_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_or_and_fetch_di((long *)(PTR),(long)(VAL))) + +extern int __sync_xor_and_fetch_si (int *, int); +extern long __sync_xor_and_fetch_di (long *, long); +#define __sync_xor_and_fetch(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_xor_and_fetch_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_xor_and_fetch_di((long *)(PTR),(long)(VAL))) + +extern int __sync_nand_and_fetch_si (int *, int); +extern long __sync_nand_and_fetch_di (long *, long); +#define __sync_nand_and_fetch(PTR,VAL) \ + ((sizeof (*(PTR)) == sizeof(int)) \ + ? (__typeof__(*(PTR))) __sync_nand_and_fetch_si((int *)(PTR),(int)(VAL)) \ + : (__typeof__(*(PTR))) __sync_nand_and_fetch_di((long *)(PTR),(long)(VAL))) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/gcc/config/ia64/lib1funcs.asm b/contrib/gcc/config/ia64/lib1funcs.asm new file mode 100644 index 0000000..75e79b0 --- /dev/null +++ b/contrib/gcc/config/ia64/lib1funcs.asm @@ -0,0 +1,703 @@ +#ifdef L__divtf3 +// Compute a 80-bit IEEE double-extended quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divtf3 + .proc __divtf3 +__divtf3: + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fnma.s1 f11 = farg1, f10, f1 +(p6) fma.s1 f12 = farg0, f10, f0 + ;; +(p6) fma.s1 f13 = f11, f11, f0 +(p6) fma.s1 f14 = f11, f11, f11 + ;; +(p6) fma.s1 f11 = f13, f13, f11 +(p6) fma.s1 f13 = f14, f10, f10 + ;; +(p6) fma.s1 f10 = f13, f11, f10 +(p6) fnma.s1 f11 = farg1, f12, farg0 + ;; +(p6) fma.s1 f11 = f11, f10, f12 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f10 = f12, f10, f10 +(p6) fnma.s1 f12 = farg1, f11, farg0 + ;; +(p6) fma.s0 fret0 = f12, f10, f11 +(p7) mov fret0 = f10 + br.ret.sptk rp + .endp __divtf3 +#endif + +#ifdef L__divdf3 +// Compute a 64-bit IEEE double quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divdf3 + .proc __divdf3 +__divdf3: + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fmpy.s1 f11 = farg0, f10 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fmpy.s1 f13 = f12, f12 + ;; +(p6) fma.s1 f10 = f12, f10, f10 +(p6) fma.s1 f11 = f13, f11, f11 + ;; +(p6) fmpy.s1 f12 = f13, f13 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.d.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.d.s1 f8 = farg1, f11, farg0 + ;; +(p6) fma.d fret0 = f8, f10, f11 +(p7) mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divdf3 +#endif + +#ifdef L__divsf3 +// Compute a 32-bit IEEE float quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divsf3 + .proc __divsf3 +__divsf3: + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fmpy.s1 f8 = farg0, f10 +(p6) fnma.s1 f9 = farg1, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fmpy.s1 f9 = f9, f9 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fmpy.s1 f9 = f9, f9 + ;; +(p6) fma.d.s1 f10 = f9, f8, f8 + ;; +(p6) fnorm.s.s0 fret0 = f10 +(p7) mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divsf3 +#endif + +#ifdef L__divdi3 +// Compute a 64-bit integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __divdi3 + .proc __divdi3 +__divdi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, so that they won't be treated as unsigned. + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fnma.s1 f11 = f9, f10, f1 +(p6) fmpy.s1 f12 = f8, f10 + ;; +(p6) fmpy.s1 f13 = f11, f11 +(p6) fma.s1 f12 = f11, f12, f12 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma.s1 f10 = f12, f10, f11 + ;; + // Round quotient to an integer. + fcvt.fx.trunc.s1 f10 = f10 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __divdi3 +#endif + +#ifdef L__moddi3 +// Compute a 64-bit integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend (a). in1 holds the divisor (b). + + .text + .align 16 + .global __moddi3 + .proc __moddi3 +__moddi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f14 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, so that they won't be treated as unsigned. + fcvt.xf f8 = f14 + fcvt.xf f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f11 = f9, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f12, f12 +(p6) fmpy.s1 f13 = f11, f11 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; + sub in1 = r0, in1 +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f10 = f12, f10, f11 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + // r = q * (-b) + a + xma.l f10 = f10, f9, f14 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __moddi3 +#endif + +#ifdef L__udivdi3 +// Compute a 64-bit unsigned integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __udivdi3 + .proc __udivdi3 +__udivdi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + fcvt.xuf.s1 f8 = f8 + fcvt.xuf.s1 f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fnma.s1 f11 = f9, f10, f1 +(p6) fmpy.s1 f12 = f8, f10 + ;; +(p6) fmpy.s1 f13 = f11, f11 +(p6) fma.s1 f12 = f11, f12, f12 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma.s1 f10 = f12, f10, f11 + ;; + // Round quotient to an unsigned integer. + fcvt.fxu.trunc.s1 f10 = f10 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __udivdi3 +#endif + +#ifdef L__umoddi3 +// Compute a 64-bit unsigned integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend (a). in1 holds the divisor (b). + + .text + .align 16 + .global __umoddi3 + .proc __umoddi3 +__umoddi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f14 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software assist faults. + fcvt.xuf.s1 f8 = f14 + fcvt.xuf.s1 f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f11 = f9, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f12, f12 +(p6) fmpy.s1 f13 = f11, f11 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; + sub in1 = r0, in1 +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f10 = f12, f10, f11 + ;; + // Round quotient to an unsigned integer. + fcvt.fxu.trunc.s1 f10 = f10 + ;; + // r = q * (-b) + a + xma.l f10 = f10, f9, f14 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __umoddi3 +#endif + +#ifdef L__divsi3 +// Compute a 32-bit integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __divsi3 + .proc __divsi3 +__divsi3: + .regstk 2,0,0,0 + sxt4 in0 = in0 + sxt4 in1 = in1 + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + mov r2 = 0x0ffdd + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 + ;; +(p6) fmpy.s1 f8 = f8, f10 +(p6) fnma.s1 f9 = f9, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fma.s1 f9 = f9, f9, f11 + ;; +(p6) fma.s1 f10 = f9, f8, f8 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __divsi3 +#endif + +#ifdef L__modsi3 +// Compute a 32-bit integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __modsi3 + .proc __modsi3 +__modsi3: + .regstk 2,0,0,0 + mov r2 = 0x0ffdd + sxt4 in0 = in0 + sxt4 in1 = in1 + ;; + setf.sig f13 = r32 + setf.sig f9 = r33 + ;; + sub in1 = r0, in1 + fcvt.xf f8 = f13 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 + ;; +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f10 = f9, f10, f1 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f12 = f10, f12, f12 +(p6) fma.s1 f10 = f10, f10, f11 + ;; +(p6) fma.s1 f10 = f10, f12, f12 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + xma.l f10 = f10, f9, f13 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __modsi3 +#endif + +#ifdef L__udivsi3 +// Compute a 32-bit unsigned integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __udivsi3 + .proc __udivsi3 +__udivsi3: + .regstk 2,0,0,0 + mov r2 = 0x0ffdd + zxt4 in0 = in0 + zxt4 in1 = in1 + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 + ;; +(p6) fmpy.s1 f8 = f8, f10 +(p6) fnma.s1 f9 = f9, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fma.s1 f9 = f9, f9, f11 + ;; +(p6) fma.s1 f10 = f9, f8, f8 + ;; + fcvt.fxu.trunc.s1 f10 = f10 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __udivsi3 +#endif + +#ifdef L__umodsi3 +// Compute a 32-bit unsigned integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __umodsi3 + .proc __umodsi3 +__umodsi3: + .regstk 2,0,0,0 + mov r2 = 0x0ffdd + zxt4 in0 = in0 + zxt4 in1 = in1 + ;; + setf.sig f13 = in0 + setf.sig f9 = in1 + ;; + sub in1 = r0, in1 + fcvt.xf f8 = f13 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 + ;; +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f10 = f9, f10, f1 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f12 = f10, f12, f12 +(p6) fma.s1 f10 = f10, f10, f11 + ;; +(p6) fma.s1 f10 = f10, f12, f12 + ;; + fcvt.fxu.trunc.s1 f10 = f10 + ;; + xma.l f10 = f10, f9, f13 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __umodsi3 +#endif + +#ifdef L__save_stack_nonlocal +// Notes on save/restore stack nonlocal: We read ar.bsp but write +// ar.bspstore. This is because ar.bsp can be read at all times +// (independent of the RSE mode) but since it's read-only we need to +// restore the value via ar.bspstore. This is OK because +// ar.bsp==ar.bspstore after executing "flushrs". + +// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) + + .text + .align 16 + .global __ia64_save_stack_nonlocal + .proc __ia64_save_stack_nonlocal +__ia64_save_stack_nonlocal: + { .mmf + alloc r18 = ar.pfs, 2, 0, 0, 0 + mov r19 = ar.rsc + ;; + } + { .mmi + flushrs + st8 [in0] = in1, 24 + and r19 = 0x1c, r19 + ;; + } + { .mmi + st8 [in0] = r18, -16 + mov ar.rsc = r19 + or r19 = 0x3, r19 + ;; + } + { .mmi + mov r16 = ar.bsp + mov r17 = ar.rnat + adds r2 = 8, in0 + ;; + } + { .mmi + st8 [in0] = r16 + st8 [r2] = r17 + } + { .mib + mov ar.rsc = r19 + br.ret.sptk.few rp + ;; + } + .endp __ia64_save_stack_nonlocal +#endif + +#ifdef L__nonlocal_goto +// void __ia64_nonlocal_goto(void *target_label, void *save_area, +// void *static_chain); + + .text + .align 16 + .global __ia64_nonlocal_goto + .proc __ia64_nonlocal_goto +__ia64_nonlocal_goto: + { .mmi + alloc r20 = ar.pfs, 3, 0, 0, 0 + ld8 r12 = [in1], 8 + mov.ret.sptk rp = in0, .L0 + ;; + } + { .mmf + ld8 r16 = [in1], 8 + mov r19 = ar.rsc + ;; + } + { .mmi + flushrs + ld8 r17 = [in1], 8 + and r19 = 0x1c, r19 + ;; + } + { .mmi + ld8 r18 = [in1] + mov ar.rsc = r19 + or r19 = 0x3, r19 + ;; + } + { .mmi + mov ar.bspstore = r16 + ;; + mov ar.rnat = r17 + ;; + } + { .mmi + loadrs + invala + mov r15 = in2 + ;; + } +.L0: { .mib + mov ar.rsc = r19 + mov ar.pfs = r18 + br.ret.sptk.few rp + ;; + } + .endp __ia64_nonlocal_goto +#endif + +#ifdef L__restore_stack_nonlocal +// This is mostly the same as nonlocal_goto above. +// ??? This has not been tested yet. + +// void __ia64_restore_stack_nonlocal(void *save_area) + + .text + .align 16 + .global __ia64_restore_stack_nonlocal + .proc __ia64_restore_stack_nonlocal +__ia64_restore_stack_nonlocal: + { .mmf + alloc r20 = ar.pfs, 4, 0, 0, 0 + ld8 r12 = [in0], 8 + ;; + } + { .mmb + ld8 r16=[in0], 8 + mov r19 = ar.rsc + ;; + } + { .mmi + flushrs + ld8 r17 = [in0], 8 + and r19 = 0x1c, r19 + ;; + } + { .mmf + ld8 r18 = [in0] + mov ar.rsc = r19 + ;; + } + { .mmi + mov ar.bspstore = r16 + ;; + mov ar.rnat = r17 + or r19 = 0x3, r19 + ;; + } + { .mmf + loadrs + invala + ;; + } +.L0: { .mib + mov ar.rsc = r19 + mov ar.pfs = r18 + br.ret.sptk.few rp + ;; + } + .endp __ia64_restore_stack_nonlocal +#endif + +#ifdef L__trampoline +// Implement the nested function trampoline. This is out of line +// so that we don't have to bother with flushing the icache, as +// well as making the on-stack trampoline smaller. +// +// The trampoline has the following form: +// +// +-------------------+ > +// TRAMP: | __ia64_trampoline | | +// +-------------------+ > fake function descriptor +// | TRAMP+16 | | +// +-------------------+ > +// | target descriptor | +// +-------------------+ +// | static link | +// +-------------------+ + + .text + .align 16 + .global __ia64_trampoline + .proc __ia64_trampoline +__ia64_trampoline: + { .mmi + ld8 r2 = [r1], 8 + ;; + ld8 r15 = [r1] + } + { .mmi + ld8 r3 = [r2], 8 + ;; + ld8 r1 = [r2] + mov b6 = r3 + } + { .bbb + br.sptk.many b6 + ;; + } + .endp __ia64_trampoline +#endif diff --git a/contrib/gcc/config/ia64/libgcc-ia64.ver b/contrib/gcc/config/ia64/libgcc-ia64.ver new file mode 100644 index 0000000..2ffb693 --- /dev/null +++ b/contrib/gcc/config/ia64/libgcc-ia64.ver @@ -0,0 +1,9 @@ +GCC_3.0 { + # IA-64 symbols + __ia64_nonlocal_goto + __ia64_personality_v1 + __ia64_restore_stack_nonlocal + __ia64_save_stack_nonlocal + __ia64_trampoline + __ia64_backtrace +} diff --git a/contrib/gcc/config/ia64/linux.h b/contrib/gcc/config/ia64/linux.h new file mode 100644 index 0000000..1889ef6 --- /dev/null +++ b/contrib/gcc/config/ia64/linux.h @@ -0,0 +1,122 @@ +/* Definitions for ia64-linux target. */ + +/* This macro is a C statement to print on `stderr' a string describing the + particular machine description choice. */ + +#define TARGET_VERSION fprintf (stderr, " (IA-64) Linux"); + +/* This is for -profile to use -lc_p instead of -lc. */ +#undef CC1_SPEC +#define CC1_SPEC "%{profile:-p} %{G*}" + +/* ??? Maybe this should be in sysv4.h? */ +#define CPP_PREDEFINES "\ +-D__ia64 -D__ia64__ -D__linux -D__linux__ -D_LONGLONG -Dlinux -Dunix \ +-D__LP64__ -D__ELF__ -Asystem=linux -Acpu=ia64 -Amachine=ia64" + +/* ??? ia64 gas doesn't accept standard svr4 assembler options? */ +#undef ASM_SPEC +#define ASM_SPEC "-x %{mconstant-gp} %{mauto-pic}" + +/* Need to override linux.h STARTFILE_SPEC, since it has crtbeginT.o in. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: \ + %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \ + %{!p:%{profile:gcrt1.o%s} \ + %{!profile:crt1.o%s}}}} \ + crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" + +/* Similar to standard Linux, but adding -ffast-math support. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s" + +/* Define this for shared library support because it isn't in the main + linux.h file. */ + +#undef LINK_SPEC +#define LINK_SPEC "\ + %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!dynamic-linker:-dynamic-linker /lib/ld-linux-ia64.so.2}} \ + %{static:-static}}" + + +#define DONT_USE_BUILTIN_SETJMP +#define JMP_BUF_SIZE 76 + +/* Output any profiling code before the prologue. */ + +#undef PROFILE_BEFORE_PROLOGUE +#define PROFILE_BEFORE_PROLOGUE 1 + +/* Override linux.h LINK_EH_SPEC definition. + Signalize that because we have fde-glibc, we don't need all C shared libs + linked against -lgcc_s. */ +#undef LINK_EH_SPEC +#define LINK_EH_SPEC "" + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#ifdef IN_LIBGCC2 +#include <signal.h> +#include <sys/ucontext.h> + +#define IA64_GATE_AREA_START 0xa000000000000100LL +#define IA64_GATE_AREA_END 0xa000000000010000LL + +#define MD_FALLBACK_FRAME_STATE_FOR(CONTEXT, FS, SUCCESS) \ + if ((CONTEXT)->rp >= IA64_GATE_AREA_START \ + && (CONTEXT)->rp < IA64_GATE_AREA_END) \ + { \ + struct sigframe { \ + char scratch[16]; \ + unsigned long sig_number; \ + struct siginfo *info; \ + struct sigcontext *sc; \ + } *frame_ = (struct sigframe *)(CONTEXT)->psp; \ + struct sigcontext *sc_ = frame_->sc; \ + \ + /* Restore scratch registers in case the unwinder needs to \ + refer to a value stored in one of them. */ \ + { \ + int i_; \ + \ + for (i_ = 2; i_ < 4; i_++) \ + (CONTEXT)->ireg[i_ - 2].loc = &sc_->sc_gr[i_]; \ + for (i_ = 8; i_ < 12; i_++) \ + (CONTEXT)->ireg[i_ - 2].loc = &sc_->sc_gr[i_]; \ + for (i_ = 14; i_ < 32; i_++) \ + (CONTEXT)->ireg[i_ - 2].loc = &sc_->sc_gr[i_]; \ + } \ + \ + (CONTEXT)->pfs_loc = &(sc_->sc_ar_pfs); \ + (CONTEXT)->lc_loc = &(sc_->sc_ar_lc); \ + (CONTEXT)->unat_loc = &(sc_->sc_ar_unat); \ + (CONTEXT)->pr = sc_->sc_pr; \ + (CONTEXT)->psp = sc_->sc_gr[12]; \ + \ + /* Don't touch the branch registers. The kernel doesn't \ + pass the preserved branch registers in the sigcontext but \ + leaves them intact, so there's no need to do anything \ + with them here. */ \ + \ + { \ + unsigned long sof = sc_->sc_cfm & 0x7f; \ + (CONTEXT)->bsp = (unsigned long) \ + ia64_rse_skip_regs ((unsigned long *)(sc_->sc_ar_bsp), -sof); \ + } \ + \ + (FS)->curr.reg[UNW_REG_RP].where = UNW_WHERE_SPREL; \ + (FS)->curr.reg[UNW_REG_RP].val \ + = (unsigned long)&(sc_->sc_ip) - (CONTEXT)->psp; \ + (FS)->curr.reg[UNW_REG_RP].when = -1; \ + \ + goto SUCCESS; \ + } +#endif /* IN_LIBGCC2 */ diff --git a/contrib/gcc/config/ia64/quadlib.c b/contrib/gcc/config/ia64/quadlib.c new file mode 100644 index 0000000..fac3b28 --- /dev/null +++ b/contrib/gcc/config/ia64/quadlib.c @@ -0,0 +1,82 @@ +/* Subroutines for long double support. + Copyright (C) 2000, 2001 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +int _U_Qfcmp (long double a, long double b, int); +long _U_Qfcnvfxt_quad_to_sgl (long double); + +int +_U_Qfeq (long double a, long double b) +{ + return (_U_Qfcmp (a, b, 4) != 0); +} + +int +_U_Qfne (long double a, long double b) +{ + return (_U_Qfcmp (a, b, 4) == 0); +} + +int +_U_Qfgt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, 17) != 0); +} + +int +_U_Qfge (long double a, long double b) +{ + return (_U_Qfcmp (a, b, 21) != 0); +} + +int +_U_Qflt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, 9) != 0); +} + +int +_U_Qfle (long double a, long double b) +{ + return (_U_Qfcmp (a, b, 13) != 0); +} + +int +_U_Qfcomp (long double a, long double b) +{ + if (_U_Qfcmp (a, b, 4) == 0) + return 0; + + return (_U_Qfcmp (a, b, 22) != 0 ? 1 : -1); +} + +long double +_U_Qfneg (long double a) +{ + return (0.0L - a); +} diff --git a/contrib/gcc/config/ia64/sysv4.h b/contrib/gcc/config/ia64/sysv4.h new file mode 100644 index 0000000..1b5d469 --- /dev/null +++ b/contrib/gcc/config/ia64/sysv4.h @@ -0,0 +1,226 @@ +/* Override definitions in elfos.h/svr4.h to be correct for IA64. */ + +/* We want DWARF2 as specified by the IA64 ABI. */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Stabs does not work properly for 64-bit targets. */ +#undef DBX_DEBUGGING_INFO + +/* Various pseudo-ops for which the Intel assembler uses non-standard + definitions. */ + +#undef STRING_ASM_OP +#define STRING_ASM_OP "\tstringz\t" + +#undef SKIP_ASM_OP +#define SKIP_ASM_OP "\t.skip\t" + +#undef COMMON_ASM_OP +#define COMMON_ASM_OP "\t.common\t" + +#undef ASCII_DATA_ASM_OP +#define ASCII_DATA_ASM_OP "\tstring\t" + +/* ??? Unfortunately, .lcomm doesn't work, because it puts things in either + .bss or .sbss, and we can't control the decision of which is used. When + I use .lcomm, I get a cryptic "Section group has no member" error from + the Intel simulator. So we must explicitly put variables in .bss + instead. This matters only if we care about the Intel assembler. */ + +/* This is asm_output_aligned_bss from varasm.c without the ASM_GLOBALIZE_LABEL + call at the beginning. */ + +/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME. */ +extern int size_directive_output; + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + if ((DECL) \ + && XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0] == SDATA_NAME_FLAG_CHAR) \ + sbss_section (); \ + else \ + bss_section (); \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 ((ALIGN) / BITS_PER_UNIT)); \ + ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL); \ + ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1); \ +} while (0) + +/* The # tells the Intel assembler that this is not a register name. + However, we can't emit the # in a label definition, so we set a variable + in ASM_OUTPUT_LABEL to control whether we want the postfix here or not. + We append the # to the label name, but since NAME can be an expression + we have to scan it for a non-label character and insert the # there. */ + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ +do { \ + const char *name_ = NAME; \ + if (*name_ == SDATA_NAME_FLAG_CHAR) \ + name_++; \ + if (*name_ == '*') \ + name_++; \ + else \ + fputs (user_label_prefix, STREAM); \ + fputs (name_, STREAM); \ + if (!ia64_asm_output_label) \ + fputc ('#', STREAM); \ +} while (0) + +/* Intel assembler requires both flags and type if declaring a non-predefined + section. */ +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP "\t.section\t.init,\"ax\",\"progbits\"" +#undef FINI_SECTION_ASM_OP +#define FINI_SECTION_ASM_OP "\t.section\t.fini,\"ax\",\"progbits\"" + +/* svr4.h undefines this, so we need to define it here. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + ia64_dbx_register_number(REGNO) + +/* Things that svr4.h defines to the wrong type, because it assumes 32 bit + ints and 32 bit longs. */ + +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* We redefine this to use the ia64 .proc pseudo-op. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ +do { \ + fputs ("\t.proc ", FILE); \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ +} while (0) + +/* We redefine this to use the ia64 .endp pseudo-op. */ + +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL) \ +do { \ + fputs ("\t.endp ", FILE); \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ +} while (0) + +/* A C expression which outputs to the stdio stream STREAM some appropriate + text to go at the start of an assembler file. */ + +/* ??? Looks like almost every port, except for a few original ones, get this + wrong. Must emit #NO_APP as first line of file to turn of special assembler + preprocessing of files. */ + +/* ??? Even worse, it doesn't work, because gas does not accept the tab chars + that dwarf2out.c emits when #NO_APP. */ + +/* ??? Unrelated, but dwarf2out.c emits unnecessary newlines after strings, + may as well fix at the same time. */ + +#undef ASM_FILE_START +#define ASM_FILE_START(STREAM) \ +do { \ + output_file_directive (STREAM, main_input_filename); \ + emit_safe_across_calls (STREAM); \ +} while (0) + +/* We override svr4.h so that we can support the sdata section. */ + +#undef SELECT_SECTION +#define SELECT_SECTION(DECL,RELOC,ALIGN) \ +{ \ + if (TREE_CODE (DECL) == STRING_CST) \ + { \ + if (! flag_writable_strings) \ + mergeable_string_section ((DECL), (ALIGN), 0); \ + else \ + data_section (); \ + } \ + else if (TREE_CODE (DECL) == VAR_DECL) \ + { \ + if (XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0] \ + == SDATA_NAME_FLAG_CHAR) \ + sdata_section (); \ + /* ??? We need the extra RELOC check, because the default is to \ + only check RELOC if flag_pic is set, and we don't set flag_pic \ + (yet?). */ \ + else if (!DECL_READONLY_SECTION (DECL, RELOC) || (RELOC)) \ + data_section (); \ + else if (flag_merge_constants < 2) \ + /* C and C++ don't allow different variables to share \ + the same location. -fmerge-all-constants allows \ + even that (at the expense of not conforming). */ \ + const_section (); \ + else if (TREE_CODE (DECL_INITIAL (DECL)) == STRING_CST) \ + mergeable_string_section (DECL_INITIAL (DECL), (ALIGN), 0); \ + else \ + mergeable_constant_section (DECL_MODE (DECL), (ALIGN), 0); \ + } \ + /* This could be a CONSTRUCTOR containing ADDR_EXPR of a VAR_DECL, \ + in which case we can't put it in a shared library rodata. */ \ + else if (flag_pic && (RELOC)) \ + data_section (); \ + else \ + const_section (); \ +} + +/* Similarly for constant pool data. */ + +extern unsigned int ia64_section_threshold; +#undef SELECT_RTX_SECTION +#define SELECT_RTX_SECTION(MODE, RTX, ALIGN) \ +{ \ + if (GET_MODE_SIZE (MODE) > 0 \ + && GET_MODE_SIZE (MODE) <= ia64_section_threshold) \ + sdata_section (); \ + else if (flag_pic && symbolic_operand ((RTX), (MODE))) \ + data_section (); \ + else \ + mergeable_constant_section ((MODE), (ALIGN), 0); \ +} + +#undef EXTRA_SECTIONS +#define EXTRA_SECTIONS in_const, in_sdata, in_sbss + +#undef EXTRA_SECTION_FUNCTIONS +#define EXTRA_SECTION_FUNCTIONS \ + CONST_SECTION_FUNCTION \ + SDATA_SECTION_FUNCTION \ + SBSS_SECTION_FUNCTION + +#define SDATA_SECTION_ASM_OP "\t.sdata" + +#define SDATA_SECTION_FUNCTION \ +void \ +sdata_section () \ +{ \ + if (in_section != in_sdata) \ + { \ + fprintf (asm_out_file, "%s\n", SDATA_SECTION_ASM_OP); \ + in_section = in_sdata; \ + } \ +} + +#define SBSS_SECTION_ASM_OP "\t.sbss" + +#define SBSS_SECTION_FUNCTION \ +void \ +sbss_section () \ +{ \ + if (in_section != in_sbss) \ + { \ + fprintf (asm_out_file, "%s\n", SBSS_SECTION_ASM_OP); \ + in_section = in_sbss; \ + } \ +} diff --git a/contrib/gcc/config/ia64/t-aix b/contrib/gcc/config/ia64/t-aix new file mode 100644 index 0000000..2e738f9 --- /dev/null +++ b/contrib/gcc/config/ia64/t-aix @@ -0,0 +1,19 @@ +# AIX support + +# Compile crtbeginS.o and crtendS.o with pic. +CRTSTUFF_T_CFLAGS_S = -fPIC +# Compile libgcc2.a with pic and defines required by AIX headers +TARGET_LIBGCC2_CFLAGS = -fPIC -D__64BIT__ -D_LONG_LONG -D_IA64 -D__int128=__size128_t +LIB2ADDEH += $(srcdir)/config/ia64/unwind-aix.c + +# Add crt[in].o to the list defined in t-ia64. These files provide +# endpoints for crtbegin/end. + +EXTRA_PARTS=crti.o crtn.o crtbegin.o crtend.o crtbeginS.o crtendS.o + +crti.o: $(srcdir)/config/ia64/crti.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -o crti.o -x assembler $(srcdir)/config/ia64/crti.asm +crtn.o: $(srcdir)/config/ia64/crtn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -o crtn.o -x assembler $(srcdir)/config/ia64/crtn.asm + +# end t-aix diff --git a/contrib/gcc/config/ia64/t-glibc b/contrib/gcc/config/ia64/t-glibc new file mode 100644 index 0000000..a105662 --- /dev/null +++ b/contrib/gcc/config/ia64/t-glibc @@ -0,0 +1 @@ +LIB2ADDEH += $(srcdir)/config/ia64/fde-glibc.c diff --git a/contrib/gcc/config/ia64/t-hpux b/contrib/gcc/config/ia64/t-hpux new file mode 100644 index 0000000..9f8296b --- /dev/null +++ b/contrib/gcc/config/ia64/t-hpux @@ -0,0 +1,15 @@ +# We need multilib support for HPUX's ILP32 & LP64 modes. + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +MULTILIB_OPTIONS = milp32/mlp64 +MULTILIB_DIRNAMES = hpux32 hpux64 +MULTILIB_MATCHES = + +# Support routines for HP-UX 128 bit floats. + +LIB2FUNCS_EXTRA=quadlib.c + +quadlib.c: $(srcdir)/config/ia64/quadlib.c + cat $(srcdir)/config/ia64/quadlib.c > quadlib.c diff --git a/contrib/gcc/config/ia64/t-ia64 b/contrib/gcc/config/ia64/t-ia64 new file mode 100644 index 0000000..41c0235 --- /dev/null +++ b/contrib/gcc/config/ia64/t-ia64 @@ -0,0 +1,42 @@ +LIB1ASMSRC = ia64/lib1funcs.asm + +# We use different names for the DImode div/mod files so that they won't +# conflict with libgcc2.c files. We used to use __ia64 as a prefix, now +# we use __ as the prefix. Note that L_divdi3 in libgcc2.c actually defines +# a TImode divide function, so there is no actual overlap here between +# libgcc2.c and lib1funcs.asm. +LIB1ASMFUNCS = __divtf3 __divdf3 __divsf3 \ + __divdi3 __moddi3 __udivdi3 __umoddi3 \ + __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \ + __nonlocal_goto __restore_stack_nonlocal __trampoline + +# ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel +# assembler does not accept # line number as a comment. +# ??? This breaks C++ pragma interface/implementation, which is used in the +# C++ part of libgcc2, hence it had to be disabled. Must find some other way +# to support the Intel assembler. +#LIBGCC2_DEBUG_CFLAGS = -g1 -P + +SHLIB_MAPFILES += $(srcdir)/config/ia64/libgcc-ia64.ver + +# For svr4 we build crtbegin.o and crtend.o which serve to add begin and +# end labels to the .ctors and .dtors section when we link using gcc. + +EXTRA_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtfastmath.o + +# Effectively disable the crtbegin/end rules using crtstuff.c +T = disable + +# Assemble startup files. +crtbegin.o: $(srcdir)/config/ia64/crtbegin.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -o crtbegin.o -x assembler-with-cpp $(srcdir)/config/ia64/crtbegin.asm +crtend.o: $(srcdir)/config/ia64/crtend.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -o crtend.o -x assembler-with-cpp $(srcdir)/config/ia64/crtend.asm +crtbeginS.o: $(srcdir)/config/ia64/crtbegin.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -DSHARED -c -o crtbeginS.o -x assembler-with-cpp $(srcdir)/config/ia64/crtbegin.asm +crtendS.o: $(srcdir)/config/ia64/crtend.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -DSHARED -c -o crtendS.o -x assembler-with-cpp $(srcdir)/config/ia64/crtend.asm +crtfastmath.o: $(srcdir)/config/ia64/crtfastmath.c $(GCC_PASSES) + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -c -o crtfastmath.o $(srcdir)/config/ia64/crtfastmath.c + +LIB2ADDEH = $(srcdir)/config/ia64/unwind-ia64.c $(srcdir)/unwind-sjlj.c diff --git a/contrib/gcc/config/ia64/unwind-aix.c b/contrib/gcc/config/ia64/unwind-aix.c new file mode 100644 index 0000000..7e59406 --- /dev/null +++ b/contrib/gcc/config/ia64/unwind-aix.c @@ -0,0 +1,120 @@ +/* Implements unwind table entry lookup for AIX (cf. fde-glibc.c). + Copyright (C) 2001, 2002 Free Software Foundation, Inc. + Contributed by Timothy Wall <twall@redhat.com> + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include "tconfig.h" +#include "tsystem.h" +#include "unwind.h" +#include "unwind-ia64.h" + +#include <dlfcn.h> +#include <link.h> +#include <sys/mman.h> + +static struct unw_table_entry * +find_fde_for_dso (Elf64_Addr pc, rt_link_map *map, + unsigned long* pseg_base, unsigned long* pgp) +{ + rt_segment *seg; + Elf64_Addr seg_base; + struct unw_table_entry *f_base; + size_t lo, hi; + + /* See if PC falls into one of the loaded segments. */ + for (seg = map->l_segments; seg; seg = (rt_segment *)seg->s_next) + { + if (pc >= seg->s_map_addr && pc < seg->s_map_addr + seg->s_mapsz) + break; + } + if (!seg) + return NULL; + + /* Search for the entry within the unwind table. */ + f_base = (struct unw_table_entry *) (map->l_unwind_table); + seg_base = (Elf64_Addr) seg->s_map_addr; + lo = 0; + hi = map->l_unwind_sz / sizeof (struct unw_table_entry); + + while (lo < hi) + { + size_t mid = (lo + hi) / 2; + struct unw_table_entry *f = f_base + mid; + + if (pc < f->start_offset + seg_base) + hi = mid; + else if (pc >= f->end_offset + seg_base) + lo = mid + 1; + else { + /* AIX executables are *always* dynamic. Look up GP for this + object. */ + Elf64_Dyn *dyn = map->l_ld; + *pgp = 0; + for (; dyn->d_tag != DT_NULL ; dyn++) + { + if (dyn->d_tag == DT_PLTGOT) + { + *pgp = dyn->d_un.d_ptr; + break; + } + } + *pseg_base = seg_base; + return f; + } + } + return NULL; +} + +/* Return a pointer to the unwind table entry for the function containing + PC. */ +struct unw_table_entry * +_Unwind_FindTableEntry (void *pc, unsigned long *pseg_base, unsigned long *pgp) +{ + extern rt_r_debug _r_debug; + struct unw_table_entry *ret; + rt_link_map *map = _r_debug.r_map; /* address of link map */ + + /* Check the main application first, hoping that most of the user's + code is there instead of in some library. */ + ret = find_fde_for_dso ((Elf64_Addr)pc, map, pseg_base, pgp); + if (ret) + { + /* If we're in the main application, use the current GP value. */ + register unsigned long gp __asm__("gp"); + *pgp = gp; + return ret; + } + + /* FIXME need a DSO lock mechanism for AIX here, to ensure shared + libraries aren't changed while we're examining them. */ + + for (map = _r_debug.r_map; map; map = map->l_next) + { + /* Skip the main application's entry. */ + if (!map->l_name) + continue; + ret = find_fde_for_dso ((Elf64_Addr)pc, map, pseg_base, pgp); + if (ret) + break; + } + + /* FIXME need a DSO unlock mechanism for AIX here. */ + + return ret; +} diff --git a/contrib/gcc/config/ia64/unwind-ia64.c b/contrib/gcc/config/ia64/unwind-ia64.c new file mode 100644 index 0000000..99923aa --- /dev/null +++ b/contrib/gcc/config/ia64/unwind-ia64.c @@ -0,0 +1,2039 @@ +/* Subroutines needed for unwinding IA-64 standard format stack frame + info for exception handling. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. + Contributed by Andrew MacLeod <amacleod@cygnus.com> + Andrew Haley <aph@cygnus.com> + David Mosberger-Tang <davidm@hpl.hp.com> + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. */ + + +#include "tconfig.h" +#include "tsystem.h" +#include "unwind.h" +#include "unwind-ia64.h" + +#ifndef __USING_SJLJ_EXCEPTIONS__ +#define UNW_VER(x) ((x) >> 48) +#define UNW_FLAG_MASK 0x0000ffff00000000 +#define UNW_FLAG_OSMASK 0x0000f00000000000 +#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L) +#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L) +#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL) + +enum unw_application_register +{ + UNW_AR_BSP, + UNW_AR_BSPSTORE, + UNW_AR_PFS, + UNW_AR_RNAT, + UNW_AR_UNAT, + UNW_AR_LC, + UNW_AR_EC, + UNW_AR_FPSR, + UNW_AR_RSC, + UNW_AR_CCV +}; + +enum unw_register_index +{ + /* Primary UNAT. */ + UNW_REG_PRI_UNAT_GR, + UNW_REG_PRI_UNAT_MEM, + + /* Memory Stack. */ + UNW_REG_PSP, /* previous memory stack pointer */ + + /* Register Stack. */ + UNW_REG_BSP, /* register stack pointer */ + UNW_REG_BSPSTORE, + UNW_REG_PFS, /* previous function state */ + UNW_REG_RNAT, + /* Return Pointer. */ + UNW_REG_RP, + + /* Special preserved registers. */ + UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR, + + /* Non-stacked general registers. */ + UNW_REG_R2, + UNW_REG_R4 = UNW_REG_R2 + 2, + UNW_REG_R7 = UNW_REG_R2 + 5, + UNW_REG_R31 = UNW_REG_R2 + 29, + + /* Non-stacked floating point registers. */ + UNW_REG_F2, + UNW_REG_F5 = UNW_REG_F2 + 3, + UNW_REG_F16 = UNW_REG_F2 + 14, + UNW_REG_F31 = UNW_REG_F2 + 29, + + /* Branch registers. */ + UNW_REG_B0, UNW_REG_B1, + UNW_REG_B5 = UNW_REG_B1 + 4, + + UNW_NUM_REGS +}; + +enum unw_where +{ + UNW_WHERE_NONE, /* register isn't saved at all */ + UNW_WHERE_GR, /* register is saved in a general register */ + UNW_WHERE_FR, /* register is saved in a floating-point register */ + UNW_WHERE_BR, /* register is saved in a branch register */ + UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */ + UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */ + + /* At the end of each prologue these locations get resolved to + UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively. */ + UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */ + UNW_WHERE_GR_SAVE /* register is saved in next general register */ +}; + +#define UNW_WHEN_NEVER 0x7fffffff + +struct unw_reg_info +{ + unsigned long val; /* save location: register number or offset */ + enum unw_where where; /* where the register gets saved */ + int when; /* when the register gets saved */ +}; + +typedef struct unw_state_record +{ + unsigned int first_region : 1; /* is this the first region? */ + unsigned int done : 1; /* are we done scanning descriptors? */ + unsigned int any_spills : 1; /* got any register spills? */ + unsigned int in_body : 1; /* are we inside a body? */ + + unsigned char *imask; /* imask of of spill_mask record or NULL */ + unsigned long pr_val; /* predicate values */ + unsigned long pr_mask; /* predicate mask */ + long spill_offset; /* psp-relative offset for spill base */ + int region_start; + int region_len; + int epilogue_start; + int epilogue_count; + int when_target; + + unsigned char gr_save_loc; /* next general register to use for saving */ + unsigned char return_link_reg; /* branch register for return link */ + + struct unw_reg_state { + struct unw_reg_state *next; + unsigned long label; /* label of this state record */ + struct unw_reg_info reg[UNW_NUM_REGS]; + } curr, *stack, *reg_state_list; + + _Unwind_Personality_Fn personality; + +} _Unwind_FrameState; + +enum unw_nat_type +{ + UNW_NAT_NONE, /* NaT not represented */ + UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */ + UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */ + UNW_NAT_REGSTK /* NaT is in rnat */ +}; + +struct unw_stack +{ + unsigned long limit; + unsigned long top; +}; + +struct _Unwind_Context +{ + /* Initial frame info. */ + unsigned long rnat; /* rse nat collection */ + unsigned long regstk_top; /* bsp for first frame */ + + /* Current frame info. */ + unsigned long bsp; /* backing store pointer value + corresponding to psp. */ + unsigned long sp; /* stack pointer value */ + unsigned long psp; /* previous sp value */ + unsigned long rp; /* return pointer */ + unsigned long pr; /* predicate collection */ + + unsigned long region_start; /* start of unwind region */ + unsigned long gp; /* global pointer value */ + void *lsda; /* language specific data area */ + + /* Preserved state. */ + unsigned long *bsp_loc; /* previous bsp save location */ + unsigned long *bspstore_loc; + unsigned long *pfs_loc; + unsigned long *pri_unat_loc; + unsigned long *unat_loc; + unsigned long *lc_loc; + unsigned long *fpsr_loc; + + unsigned long eh_data[4]; + + struct unw_ireg + { + unsigned long *loc; + struct unw_ireg_nat + { + enum unw_nat_type type : 3; + signed long off : 61; /* NaT word is at loc+nat.off */ + } nat; + } ireg[32 - 2]; /* Indexed by <register number> - 2 */ + + unsigned long *br_loc[7]; + void *fr_loc[32 - 2]; + + /* ??? We initially point pri_unat_loc here. The entire NAT bit + logic needs work. */ + unsigned long initial_unat; +}; + +typedef unsigned long unw_word; + +/* Implicit register save order. See section 11.4.2.3 Rules for Using + Unwind Descriptors, rule 3. */ + +static unsigned char const save_order[] = +{ + UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, + UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR +}; + + +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + +/* Unwind decoder routines */ + +static void +push (struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + rs = malloc (sizeof (struct unw_reg_state)); + memcpy (rs, &sr->curr, sizeof (*rs)); + rs->next = sr->stack; + sr->stack = rs; +} + +static void +pop (struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + rs = sr->stack; + sr->stack = rs->next; + free (rs); +} + +static enum unw_register_index __attribute__((const)) +decode_abreg (unsigned char abreg, int memory) +{ + switch (abreg) + { + case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04); + case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22); + case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30); + case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41); + case 0x60: return UNW_REG_PR; + case 0x61: return UNW_REG_PSP; + case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR; + case 0x63: return UNW_REG_RP; + case 0x64: return UNW_REG_BSP; + case 0x65: return UNW_REG_BSPSTORE; + case 0x66: return UNW_REG_RNAT; + case 0x67: return UNW_REG_UNAT; + case 0x68: return UNW_REG_FPSR; + case 0x69: return UNW_REG_PFS; + case 0x6a: return UNW_REG_LC; + default: + abort (); + } +} + +static void +set_reg (struct unw_reg_info *reg, enum unw_where where, + int when, unsigned long val) +{ + reg->val = val; + reg->where = where; + if (reg->when == UNW_WHEN_NEVER) + reg->when = when; +} + +static void +alloc_spill_area (unsigned long *offp, unsigned long regsize, + struct unw_reg_info *lo, struct unw_reg_info *hi) +{ + struct unw_reg_info *reg; + + for (reg = hi; reg >= lo; --reg) + { + if (reg->where == UNW_WHERE_SPILL_HOME) + { + reg->where = UNW_WHERE_PSPREL; + reg->val = 0x10 - *offp; + *offp += regsize; + } + } +} + +static inline void +spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, + unw_word t) +{ + struct unw_reg_info *reg; + + for (reg = *regp; reg <= lim; ++reg) + { + if (reg->where == UNW_WHERE_SPILL_HOME) + { + reg->when = t; + *regp = reg + 1; + return; + } + } + /* Excess spill. */ + abort (); +} + +static void +finish_prologue (struct unw_state_record *sr) +{ + struct unw_reg_info *reg; + unsigned long off; + int i; + + /* First, resolve implicit register save locations + (see Section "11.4.2.3 Rules for Using Unwind Descriptors", rule 3). */ + + for (i = 0; i < (int) sizeof(save_order); ++i) + { + reg = sr->curr.reg + save_order[i]; + if (reg->where == UNW_WHERE_GR_SAVE) + { + reg->where = UNW_WHERE_GR; + reg->val = sr->gr_save_loc++; + } + } + + /* Next, compute when the fp, general, and branch registers get saved. + This must come before alloc_spill_area() because we need to know + which registers are spilled to their home locations. */ + if (sr->imask) + { + static unsigned char const limit[3] = { + UNW_REG_F31, UNW_REG_R7, UNW_REG_B5 + }; + + unsigned char kind, mask = 0, *cp = sr->imask; + int t; + struct unw_reg_info *(regs[3]); + + regs[0] = sr->curr.reg + UNW_REG_F2; + regs[1] = sr->curr.reg + UNW_REG_R4; + regs[2] = sr->curr.reg + UNW_REG_B1; + + for (t = 0; t < sr->region_len; ++t) + { + if ((t & 3) == 0) + mask = *cp++; + kind = (mask >> 2*(3-(t & 3))) & 3; + if (kind > 0) + spill_next_when(®s[kind - 1], sr->curr.reg + limit[kind - 1], + sr->region_start + t); + } + } + + /* Next, lay out the memory stack spill area. */ + if (sr->any_spills) + { + off = sr->spill_offset; + alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, + sr->curr.reg + UNW_REG_F31); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, + sr->curr.reg + UNW_REG_B5); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, + sr->curr.reg + UNW_REG_R7); + } +} + +/* + * Region header descriptors. + */ + +static void +desc_prologue (int body, unw_word rlen, unsigned char mask, + unsigned char grsave, struct unw_state_record *sr) +{ + int i; + + if (!(sr->in_body || sr->first_region)) + finish_prologue(sr); + sr->first_region = 0; + + /* Check if we're done. */ + if (body && sr->when_target < sr->region_start + sr->region_len) + { + sr->done = 1; + return; + } + + for (i = 0; i < sr->epilogue_count; ++i) + pop(sr); + sr->epilogue_count = 0; + sr->epilogue_start = UNW_WHEN_NEVER; + + if (!body) + push(sr); + + sr->region_start += sr->region_len; + sr->region_len = rlen; + sr->in_body = body; + + if (!body) + { + for (i = 0; i < 4; ++i) + { + if (mask & 0x8) + set_reg (sr->curr.reg + save_order[i], UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, grsave++); + mask <<= 1; + } + sr->gr_save_loc = grsave; + sr->any_spills = 0; + sr->imask = 0; + sr->spill_offset = 0x10; /* default to psp+16 */ + } +} + +/* + * Prologue descriptors. + */ + +static inline void +desc_abi (unsigned char abi __attribute__((unused)), + unsigned char context __attribute__((unused)), + struct unw_state_record *sr __attribute__((unused))) +{ + /* Anything to do? */ +} + +static inline void +desc_br_gr (unsigned char brmask, unsigned char gr, + struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) + { + if (brmask & 1) + set_reg (sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + brmask >>= 1; + } +} + +static inline void +desc_br_mem (unsigned char brmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) + { + if (brmask & 1) + { + set_reg (sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + brmask >>= 1; + } +} + +static inline void +desc_frgr_mem (unsigned char grmask, unw_word frmask, + struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) + { + if ((grmask & 1) != 0) + { + set_reg (sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } + for (i = 0; i < 20; ++i) + { + if ((frmask & 1) != 0) + { + set_reg (sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_fr_mem (unsigned char frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) + { + if ((frmask & 1) != 0) + { + set_reg (sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_gr_gr (unsigned char grmask, unsigned char gr, + struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) + { + if ((grmask & 1) != 0) + set_reg (sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + grmask >>= 1; + } +} + +static inline void +desc_gr_mem (unsigned char grmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) + { + if ((grmask & 1) != 0) + { + set_reg (sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } +} + +static inline void +desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr) +{ + set_reg (sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE, + sr->region_start + MIN ((int)t, sr->region_len - 1), 16*size); +} + +static inline void +desc_mem_stack_v (unw_word t, struct unw_state_record *sr) +{ + sr->curr.reg[UNW_REG_PSP].when + = sr->region_start + MIN ((int)t, sr->region_len - 1); +} + +static inline void +desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr) +{ + set_reg (sr->curr.reg + reg, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, dst); +} + +static inline void +desc_reg_psprel (unsigned char reg, unw_word pspoff, + struct unw_state_record *sr) +{ + set_reg (sr->curr.reg + reg, UNW_WHERE_PSPREL, + sr->region_start + sr->region_len - 1, + 0x10 - 4*pspoff); +} + +static inline void +desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr) +{ + set_reg (sr->curr.reg + reg, UNW_WHERE_SPREL, + sr->region_start + sr->region_len - 1, + 4*spoff); +} + +static inline void +desc_rp_br (unsigned char dst, struct unw_state_record *sr) +{ + sr->return_link_reg = dst; +} + +static inline void +desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr) +{ + struct unw_reg_info *reg = sr->curr.reg + regnum; + + if (reg->where == UNW_WHERE_NONE) + reg->where = UNW_WHERE_GR_SAVE; + reg->when = sr->region_start + MIN ((int)t, sr->region_len - 1); +} + +static inline void +desc_spill_base (unw_word pspoff, struct unw_state_record *sr) +{ + sr->spill_offset = 0x10 - 4*pspoff; +} + +static inline unsigned char * +desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr) +{ + sr->imask = imaskp; + return imaskp + (2*sr->region_len + 7)/8; +} + +/* + * Body descriptors. + */ +static inline void +desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr) +{ + sr->epilogue_start = sr->region_start + sr->region_len - 1 - t; + sr->epilogue_count = ecount + 1; +} + +static inline void +desc_copy_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + for (rs = sr->reg_state_list; rs; rs = rs->next) + { + if (rs->label == label) + { + memcpy (&sr->curr, rs, sizeof(sr->curr)); + return; + } + } + abort (); +} + +static inline void +desc_label_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + rs = malloc (sizeof (struct unw_reg_state)); + memcpy (rs, &sr->curr, sizeof (*rs)); + rs->label = label; + rs->next = sr->reg_state_list; + sr->reg_state_list = rs; +} + +/* + * General descriptors. + */ + +static inline int +desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr) +{ + if (sr->when_target <= sr->region_start + MIN ((int)t, sr->region_len - 1)) + return 0; + if (qp > 0) + { + if ((sr->pr_val & (1UL << qp)) == 0) + return 0; + sr->pr_mask |= (1UL << qp); + } + return 1; +} + +static inline void +desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (! desc_is_active (qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg (abreg, 0); + r->where = UNW_WHERE_NONE; + r->when = sr->region_start + MIN ((int)t, sr->region_len - 1); + r->val = 0; +} + +static inline void +desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, + unsigned char x, unsigned char ytreg, + struct unw_state_record *sr) +{ + enum unw_where where = UNW_WHERE_GR; + struct unw_reg_info *r; + + if (! desc_is_active (qp, t, sr)) + return; + + if (x) + where = UNW_WHERE_BR; + else if (ytreg & 0x80) + where = UNW_WHERE_FR; + + r = sr->curr.reg + decode_abreg (abreg, 0); + r->where = where; + r->when = sr->region_start + MIN ((int)t, sr->region_len - 1); + r->val = ytreg & 0x7f; +} + +static inline void +desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, + unw_word pspoff, struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (! desc_is_active (qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg (abreg, 1); + r->where = UNW_WHERE_PSPREL; + r->when = sr->region_start + MIN((int)t, sr->region_len - 1); + r->val = 0x10 - 4*pspoff; +} + +static inline void +desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, + unw_word spoff, struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (! desc_is_active (qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg (abreg, 1); + r->where = UNW_WHERE_SPREL; + r->when = sr->region_start + MIN ((int)t, sr->region_len - 1); + r->val = 4*spoff; +} + + +#define UNW_DEC_BAD_CODE(code) abort (); + +/* Region headers. */ +#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg) +#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg) + +/* Prologue descriptors. */ +#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg) +#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg) +#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg) +#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg) +#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg) +#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg) +#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg) +#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg) +#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg) +#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg) +#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg) +#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg) +#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg) +#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg) +#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg) +#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg) +#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg)) + +/* Body descriptors. */ +#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg) +#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg) +#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg) + +/* General unwind descriptors. */ +#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg) +#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg) +#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg) +#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg) +#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg) + + +/* + * Generic IA-64 unwind info decoder. + * + * This file is used both by the Linux kernel and objdump. Please keep + * the copies of this file in sync. + * + * You need to customize the decoder by defining the following + * macros/constants before including this file: + * + * Types: + * unw_word Unsigned integer type with at least 64 bits + * + * Register names: + * UNW_REG_BSP + * UNW_REG_BSPSTORE + * UNW_REG_FPSR + * UNW_REG_LC + * UNW_REG_PFS + * UNW_REG_PR + * UNW_REG_RNAT + * UNW_REG_PSP + * UNW_REG_RP + * UNW_REG_UNAT + * + * Decoder action macros: + * UNW_DEC_BAD_CODE(code) + * UNW_DEC_ABI(fmt,abi,context,arg) + * UNW_DEC_BR_GR(fmt,brmask,gr,arg) + * UNW_DEC_BR_MEM(fmt,brmask,arg) + * UNW_DEC_COPY_STATE(fmt,label,arg) + * UNW_DEC_EPILOGUE(fmt,t,ecount,arg) + * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg) + * UNW_DEC_FR_MEM(fmt,frmask,arg) + * UNW_DEC_GR_GR(fmt,grmask,gr,arg) + * UNW_DEC_GR_MEM(fmt,grmask,arg) + * UNW_DEC_LABEL_STATE(fmt,label,arg) + * UNW_DEC_MEM_STACK_F(fmt,t,size,arg) + * UNW_DEC_MEM_STACK_V(fmt,t,arg) + * UNW_DEC_PRIUNAT_GR(fmt,r,arg) + * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg) + * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg) + * UNW_DEC_PROLOGUE(fmt,body,rlen,arg) + * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg) + * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg) + * UNW_DEC_REG_REG(fmt,src,dst,arg) + * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg) + * UNW_DEC_REG_WHEN(fmt,reg,t,arg) + * UNW_DEC_RESTORE(fmt,t,abreg,arg) + * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg) + * UNW_DEC_SPILL_BASE(fmt,pspoff,arg) + * UNW_DEC_SPILL_MASK(fmt,imaskp,arg) + * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg) + * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg) + */ + +static unw_word +unw_decode_uleb128 (unsigned char **dpp) +{ + unsigned shift = 0; + unw_word byte, result = 0; + unsigned char *bp = *dpp; + + while (1) + { + byte = *bp++; + result |= (byte & 0x7f) << shift; + if ((byte & 0x80) == 0) + break; + shift += 7; + } + *dpp = bp; + return result; +} + +static unsigned char * +unw_decode_x1 (unsigned char *dp, + unsigned char code __attribute__((unused)), + void *arg) +{ + unsigned char byte1, abreg; + unw_word t, off; + + byte1 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x2 (unsigned char *dp, + unsigned char code __attribute__((unused)), + void *arg) +{ + unsigned char byte1, byte2, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + ytreg = byte2; + x = (byte1 >> 7) & 1; + if ((byte1 & 0x80) == 0 && ytreg == 0) + UNW_DEC_RESTORE(X2, t, abreg, arg); + else + UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_x3 (unsigned char *dp, + unsigned char code __attribute__((unused)), + void *arg) +{ + unsigned char byte1, byte2, abreg, qp; + unw_word t, off; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x4 (unsigned char *dp, + unsigned char code __attribute__((unused)), + void *arg) +{ + unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + t = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + x = (byte2 >> 7) & 1; + ytreg = byte3; + + if ((byte2 & 0x80) == 0 && byte3 == 0) + UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg); + else + UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg) +{ + int body = (code & 0x20) != 0; + unw_word rlen; + + rlen = (code & 0x1f); + UNW_DEC_PROLOGUE(R1, body, rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, mask, grsave; + unw_word rlen; + + byte1 = *dp++; + + mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + grsave = (byte1 & 0x7f); + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg); + return dp; +} + +static unsigned char * +unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word rlen; + + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char brmask = (code & 0x1f); + + UNW_DEC_BR_MEM(P1, brmask, arg); + return dp; +} + +static unsigned char * +unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg) +{ + if ((code & 0x10) == 0) + { + unsigned char byte1 = *dp++; + + UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1), + (byte1 & 0x7f), arg); + } + else if ((code & 0x08) == 0) + { + unsigned char byte1 = *dp++, r, dst; + + r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + dst = (byte1 & 0x7f); + switch (r) + { + case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break; + case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break; + case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break; + case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break; + case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break; + case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break; + case 6: UNW_DEC_RP_BR(P3, dst, arg); break; + case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break; + case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break; + case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break; + case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break; + case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else if ((code & 0x7) == 0) + UNW_DEC_SPILL_MASK(P4, dp, arg); + else if ((code & 0x7) == 1) + { + unw_word grmask, frmask, byte1, byte2, byte3; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + grmask = ((byte1 >> 4) & 0xf); + frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3; + UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg); + } + else + UNW_DEC_BAD_CODE(code); + return dp; +} + +static unsigned char * +unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg) +{ + int gregs = (code & 0x10) != 0; + unsigned char mask = (code & 0x0f); + + if (gregs) + UNW_DEC_GR_MEM(P6, mask, arg); + else + UNW_DEC_FR_MEM(P6, mask, arg); + return dp; +} + +static unsigned char * +unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char r, byte1, byte2; + unw_word t, size; + + if ((code & 0x10) == 0) + { + r = (code & 0xf); + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 0: + size = unw_decode_uleb128 (&dp); + UNW_DEC_MEM_STACK_F(P7, t, size, arg); + break; + + case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break; + case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break; + case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break; + case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break; + case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break; + case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break; + case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break; + case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break; + case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break; + case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break; + case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break; + case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else + { + switch (code & 0xf) + { + case 0x0: /* p8 */ + { + r = *dp++; + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break; + case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break; + case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break; + case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break; + case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break; + case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break; + case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break; + case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break; + case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break; + case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break; + case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break; + case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break; + case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break; + case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + break; + + case 0x1: + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg); + break; + + case 0xf: /* p10 */ + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_ABI(P10, byte1, byte2, arg); + break; + + case 0x9: + return unw_decode_x1 (dp, code, arg); + + case 0xa: + return unw_decode_x2 (dp, code, arg); + + case 0xb: + return unw_decode_x3 (dp, code, arg); + + case 0xc: + return unw_decode_x4 (dp, code, arg); + + default: + UNW_DEC_BAD_CODE(code); + break; + } + } + return dp; +} + +static unsigned char * +unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word label = (code & 0x1f); + + if ((code & 0x20) != 0) + UNW_DEC_COPY_STATE(B1, label, arg); + else + UNW_DEC_LABEL_STATE(B1, label, arg); + return dp; +} + +static unsigned char * +unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t; + + t = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg); + return dp; +} + +static unsigned char * +unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t, ecount, label; + + if ((code & 0x10) == 0) + { + t = unw_decode_uleb128 (&dp); + ecount = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B3, t, ecount, arg); + } + else if ((code & 0x07) == 0) + { + label = unw_decode_uleb128 (&dp); + if ((code & 0x08) != 0) + UNW_DEC_COPY_STATE(B4, label, arg); + else + UNW_DEC_LABEL_STATE(B4, label, arg); + } + else + switch (code & 0x7) + { + case 1: return unw_decode_x1 (dp, code, arg); + case 2: return unw_decode_x2 (dp, code, arg); + case 3: return unw_decode_x3 (dp, code, arg); + case 4: return unw_decode_x4 (dp, code, arg); + default: UNW_DEC_BAD_CODE(code); break; + } + return dp; +} + +typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *); + +static unw_decoder unw_decode_table[2][8] = +{ + /* prologue table: */ + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_p1, /* 4 */ + unw_decode_p2_p5, + unw_decode_p6, + unw_decode_p7_p10 + }, + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_b1, /* 4 */ + unw_decode_b1, + unw_decode_b2, + unw_decode_b3_x4 + } +}; + +/* + * Decode one descriptor and return address of next descriptor. + */ +static inline unsigned char * +unw_decode (unsigned char *dp, int inside_body, void *arg) +{ + unw_decoder decoder; + unsigned char code; + + code = *dp++; + decoder = unw_decode_table[inside_body][code >> 5]; + dp = (*decoder) (dp, code, arg); + return dp; +} + + +/* RSE helper functions. */ + +static inline unsigned long +ia64_rse_slot_num (unsigned long *addr) +{ + return (((unsigned long) addr) >> 3) & 0x3f; +} + +/* Return TRUE if ADDR is the address of an RNAT slot. */ +static inline unsigned long +ia64_rse_is_rnat_slot (unsigned long *addr) +{ + return ia64_rse_slot_num (addr) == 0x3f; +} + +/* Returns the address of the RNAT slot that covers the slot at + address SLOT_ADDR. */ +static inline unsigned long * +ia64_rse_rnat_addr (unsigned long *slot_addr) +{ + return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3)); +} + +/* Calcuate the number of registers in the dirty partition starting at + BSPSTORE with a size of DIRTY bytes. This isn't simply DIRTY + divided by eight because the 64th slot is used to store ar.rnat. */ +static inline unsigned long +ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp) +{ + unsigned long slots = (bsp - bspstore); + + return slots - (ia64_rse_slot_num (bspstore) + slots)/0x40; +} + +/* The inverse of the above: given bspstore and the number of + registers, calculate ar.bsp. */ +static inline unsigned long * +ia64_rse_skip_regs (unsigned long *addr, long num_regs) +{ + long delta = ia64_rse_slot_num (addr) + num_regs; + + if (num_regs < 0) + delta -= 0x3e; + return addr + num_regs + delta/0x3f; +} + + +/* Unwind accessors. */ + +static void +unw_access_gr (struct _Unwind_Context *info, int regnum, + unsigned long *val, char *nat, int write) +{ + unsigned long *addr, *nat_addr = 0, nat_mask = 0, dummy_nat; + struct unw_ireg *ireg; + + if ((unsigned) regnum - 1 >= 127) + abort (); + + if (regnum < 1) + { + nat_addr = addr = &dummy_nat; + dummy_nat = 0; + } + else if (regnum < 32) + { + /* Access a non-stacked register. */ + ireg = &info->ireg[regnum - 2]; + addr = ireg->loc; + if (addr) + { + nat_addr = addr + ireg->nat.off; + switch (ireg->nat.type) + { + case UNW_NAT_VAL: + /* Simulate getf.sig/setf.sig. */ + if (write) + { + if (*nat) + { + /* Write NaTVal and be done with it. */ + addr[0] = 0; + addr[1] = 0x1fffe; + return; + } + addr[1] = 0x1003e; + } + else if (addr[0] == 0 && addr[1] == 0x1ffe) + { + /* Return NaT and be done with it. */ + *val = 0; + *nat = 1; + return; + } + /* FALLTHRU */ + + case UNW_NAT_NONE: + dummy_nat = 0; + nat_addr = &dummy_nat; + break; + + case UNW_NAT_MEMSTK: + nat_mask = 1UL << ((long) addr & 0x1f8)/8; + break; + + case UNW_NAT_REGSTK: + nat_addr = ia64_rse_rnat_addr (addr); + if ((unsigned long) nat_addr >= info->regstk_top) + nat_addr = &info->rnat; + nat_mask = 1UL << ia64_rse_slot_num (addr); + break; + } + } + } + else + { + /* Access a stacked register. */ + addr = ia64_rse_skip_regs ((unsigned long *) info->bsp, regnum - 32); + nat_addr = ia64_rse_rnat_addr (addr); + if ((unsigned long) nat_addr >= info->regstk_top) + nat_addr = &info->rnat; + nat_mask = 1UL << ia64_rse_slot_num (addr); + } + + if (write) + { + *addr = *val; + if (*nat) + *nat_addr |= nat_mask; + else + *nat_addr &= ~nat_mask; + } + else + { + *val = *addr; + *nat = (*nat_addr & nat_mask) != 0; + } +} + +/* Get the value of register REG as saved in CONTEXT. */ + +_Unwind_Word +_Unwind_GetGR (struct _Unwind_Context *context, int index) +{ + _Unwind_Word ret; + char nat; + + if (index == 1) + return context->gp; + else if (index >= 15 && index <= 18) + return context->eh_data[index - 15]; + else + unw_access_gr (context, index, &ret, &nat, 0); + + return ret; +} + +/* Overwrite the saved value for register REG in CONTEXT with VAL. */ + +void +_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val) +{ + char nat = 0; + + if (index == 1) + context->gp = val; + else if (index >= 15 && index <= 18) + context->eh_data[index - 15] = val; + else + unw_access_gr (context, index, &val, &nat, 1); +} + +/* Retrieve the return address for CONTEXT. */ + +inline _Unwind_Ptr +_Unwind_GetIP (struct _Unwind_Context *context) +{ + return context->rp; +} + +/* Overwrite the return address for CONTEXT with VAL. */ + +inline void +_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val) +{ + context->rp = val; +} + +void * +_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context) +{ + return context->lsda; +} + +_Unwind_Ptr +_Unwind_GetRegionStart (struct _Unwind_Context *context) +{ + return context->region_start; +} + + +static _Unwind_Reason_Code +uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs) +{ + struct unw_table_entry *ent; + unsigned long *unw, header, length; + unsigned char *insn, *insn_end; + unsigned long segment_base; + + memset (fs, 0, sizeof (*fs)); + context->lsda = 0; + + ent = _Unwind_FindTableEntry ((void *) context->rp, + &segment_base, &context->gp); + if (ent == NULL) + { + /* Couldn't find unwind info for this function. Try an + os-specific fallback mechanism. This will necessarily + not provide a personality routine or LSDA. */ +#ifdef MD_FALLBACK_FRAME_STATE_FOR + MD_FALLBACK_FRAME_STATE_FOR (context, fs, success); + + /* [SCRA 11.4.1] A leaf function with no memory stack, no exception + handlers, and which keeps the return value in B0 does not need + an unwind table entry. + + This can only happen in the frame after unwinding through a signal + handler. Avoid infinite looping by requiring that B0 != RP. */ + if (context->br_loc[0] && *context->br_loc[0] != context->rp) + { + fs->curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + fs->curr.reg[UNW_REG_RP].when = -1; + fs->curr.reg[UNW_REG_RP].val = 0; + goto success; + } + + return _URC_END_OF_STACK; + success: + return _URC_NO_REASON; +#else + return _URC_END_OF_STACK; +#endif + } + + context->region_start = ent->start_offset + segment_base; + fs->when_target = (context->rp - context->region_start) / 16 * 3; + + unw = (unsigned long *) (ent->info_offset + segment_base); + header = *unw; + length = UNW_LENGTH (header); + + /* ??? Perhaps check UNW_VER / UNW_FLAG_OSMASK. */ + + if (UNW_FLAG_EHANDLER (header) | UNW_FLAG_UHANDLER (header)) + { + fs->personality = + *(_Unwind_Personality_Fn *) (unw[length + 1] + context->gp); + context->lsda = unw + length + 2; + } + + insn = (unsigned char *) (unw + 1); + insn_end = (unsigned char *) (unw + 1 + length); + while (!fs->done && insn < insn_end) + insn = unw_decode (insn, fs->in_body, fs); + + /* If we're in the epilogue, sp has been restored and all values + on the memory stack below psp also have been restored. */ + if (fs->when_target > fs->epilogue_start) + { + struct unw_reg_info *r; + + fs->curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE; + fs->curr.reg[UNW_REG_PSP].val = 0; + for (r = fs->curr.reg; r < fs->curr.reg + UNW_NUM_REGS; ++r) + if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10) + || r->where == UNW_WHERE_SPREL) + r->where = UNW_WHERE_NONE; + } + + /* If RP did't get saved, generate entry for the return link register. */ + if (fs->curr.reg[UNW_REG_RP].when >= fs->when_target) + { + fs->curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + fs->curr.reg[UNW_REG_RP].when = -1; + fs->curr.reg[UNW_REG_RP].val = fs->return_link_reg; + } + + return _URC_NO_REASON; +} + +static void +uw_update_reg_address (struct _Unwind_Context *context, + _Unwind_FrameState *fs, + enum unw_register_index regno) +{ + struct unw_reg_info *r = fs->curr.reg + regno; + void *addr; + unsigned long rval; + + if (r->where == UNW_WHERE_NONE || r->when >= fs->when_target) + return; + + rval = r->val; + switch (r->where) + { + case UNW_WHERE_GR: + if (rval >= 32) + addr = ia64_rse_skip_regs ((unsigned long *) context->bsp, rval - 32); + else if (rval >= 2) + addr = context->ireg[rval - 2].loc; + else + abort (); + break; + + case UNW_WHERE_FR: + if (rval >= 2 && rval < 32) + addr = context->fr_loc[rval - 2]; + else + abort (); + break; + + case UNW_WHERE_BR: + /* Note that while RVAL can only be 1-5 from normal descriptors, + we can want to look at B0 due to having manually unwound a + signal frame. */ + if (rval >= 0 && rval <= 5) + addr = context->br_loc[rval]; + else + abort (); + break; + + case UNW_WHERE_SPREL: + addr = (void *)(context->sp + rval); + break; + + case UNW_WHERE_PSPREL: + addr = (void *)(context->psp + rval); + break; + + default: + abort (); + } + + switch (regno) + { + case UNW_REG_R2 ... UNW_REG_R31: + context->ireg[regno - UNW_REG_R2].loc = addr; + switch (r->where) + { + case UNW_WHERE_GR: + if (rval >= 32) + { + context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_MEMSTK; + context->ireg[regno - UNW_REG_R2].nat.off + = context->pri_unat_loc - (unsigned long *) addr; + } + else if (rval >= 2) + { + context->ireg[regno - UNW_REG_R2].nat + = context->ireg[rval - 2].nat; + } + else + abort (); + break; + + case UNW_WHERE_FR: + context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_VAL; + context->ireg[regno - UNW_REG_R2].nat.off = 0; + break; + + case UNW_WHERE_BR: + context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_NONE; + context->ireg[regno - UNW_REG_R2].nat.off = 0; + break; + + case UNW_WHERE_PSPREL: + case UNW_WHERE_SPREL: + context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_MEMSTK; + context->ireg[regno - UNW_REG_R2].nat.off + = context->pri_unat_loc - (unsigned long *) addr; + break; + + default: + abort (); + } + break; + + case UNW_REG_F2 ... UNW_REG_F31: + context->fr_loc[regno - UNW_REG_F2] = addr; + break; + + case UNW_REG_B1 ... UNW_REG_B5: + context->br_loc[regno - UNW_REG_B0] = addr; + break; + + case UNW_REG_BSP: + context->bsp_loc = addr; + break; + case UNW_REG_BSPSTORE: + context->bspstore_loc = addr; + break; + case UNW_REG_PFS: + context->pfs_loc = addr; + break; + case UNW_REG_RP: + context->rp = *(unsigned long *)addr; + break; + case UNW_REG_UNAT: + context->unat_loc = addr; + break; + case UNW_REG_PR: + context->pr = *(unsigned long *) addr; + break; + case UNW_REG_LC: + context->lc_loc = addr; + break; + case UNW_REG_FPSR: + context->fpsr_loc = addr; + break; + + case UNW_REG_PSP: + context->psp = *(unsigned long *)addr; + break; + + case UNW_REG_RNAT: + case UNW_NUM_REGS: + abort (); + } +} + +static void +uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs) +{ + long i; + + context->sp = context->psp; + + /* First, set PSP. Subsequent instructions may depend on this value. */ + if (fs->when_target > fs->curr.reg[UNW_REG_PSP].when) + { + if (fs->curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE) + context->psp = context->psp + fs->curr.reg[UNW_REG_PSP].val; + else + uw_update_reg_address (context, fs, UNW_REG_PSP); + } + + /* Determine the location of the primary UNaT. */ + { + int i; + if (fs->when_target < fs->curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else if (fs->when_target < fs->curr.reg[UNW_REG_PRI_UNAT_MEM].when) + i = UNW_REG_PRI_UNAT_GR; + else if (fs->curr.reg[UNW_REG_PRI_UNAT_MEM].when + > fs->curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else + i = UNW_REG_PRI_UNAT_GR; + uw_update_reg_address (context, fs, i); + } + + /* Compute the addresses of all registers saved in this frame. */ + for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i) + uw_update_reg_address (context, fs, i); + + /* Unwind BSP for the local registers allocated this frame. */ + /* ??? What to do with stored BSP or BSPSTORE registers. */ + if (fs->when_target > fs->curr.reg[UNW_REG_PFS].when) + { + unsigned long pfs = *context->pfs_loc; + unsigned long sol = (pfs >> 7) & 0x7f; + context->bsp = (unsigned long) + ia64_rse_skip_regs ((unsigned long *) context->bsp, -sol); + } +} + +/* Fill in CONTEXT for top-of-stack. The only valid registers at this + level will be the return address and the CFA. Note that CFA = SP+16. */ + +#define uw_init_context(CONTEXT) \ + do { \ + /* ??? There is a whole lot o code in uw_install_context that \ + tries to avoid spilling the entire machine state here. We \ + should try to make that work again. */ \ + __builtin_unwind_init(); \ + uw_init_context_1 (CONTEXT, __builtin_ia64_bsp ()); \ + } while (0) + +static void +uw_init_context_1 (struct _Unwind_Context *context, void *bsp) +{ + void *rp = __builtin_extract_return_addr (__builtin_return_address (0)); + /* Set psp to the caller's stack pointer. */ + void *psp = __builtin_dwarf_cfa () - 16; + _Unwind_FrameState fs; + + /* Flush the register stack to memory so that we can access it. */ + __builtin_ia64_flushrs (); + + memset (context, 0, sizeof (struct _Unwind_Context)); + context->bsp = context->regstk_top = (unsigned long) bsp; + context->psp = (unsigned long) psp; + context->rp = (unsigned long) rp; + asm ("mov %0 = sp" : "=r" (context->sp)); + asm ("mov %0 = pr" : "=r" (context->pr)); + context->pri_unat_loc = &context->initial_unat; /* ??? */ + /* ??? Get rnat. Don't we have to turn off the rse for that? */ + + if (uw_frame_state_for (context, &fs) != _URC_NO_REASON) + abort (); + + uw_update_context (context, &fs); +} + +/* Install (ie longjmp to) the contents of TARGET. */ + +static void __attribute__((noreturn)) +uw_install_context (struct _Unwind_Context *current __attribute__((unused)), + struct _Unwind_Context *target) +{ + unsigned long ireg_buf[4], ireg_nat = 0, ireg_pr = 0; + long i; + + /* Copy integer register data from the target context to a + temporary buffer. Do this so that we can frob AR.UNAT + to get the NaT bits for these registers set properly. */ + for (i = 4; i <= 7; ++i) + { + char nat; + void *t = target->ireg[i - 2].loc; + if (t) + { + unw_access_gr (target, i, &ireg_buf[i - 4], &nat, 0); + ireg_nat |= (long)nat << (((size_t)&ireg_buf[i - 4] >> 3) & 0x3f); + /* Set p6 - p9. */ + ireg_pr |= 4L << i; + } + } + + /* The value in uc_bsp that we've computed is that for the + target function. The value that we install below will be + adjusted by the BR.RET instruction based on the contents + of AR.PFS. So we must unadjust that here. */ + target->bsp = (unsigned long) + ia64_rse_skip_regs ((unsigned long *)target->bsp, + (*target->pfs_loc >> 7) & 0x7f); + + /* Provide assembly with the offsets into the _Unwind_Context. */ + asm volatile ("uc_rnat = %0" + : : "i"(offsetof (struct _Unwind_Context, rnat))); + asm volatile ("uc_bsp = %0" + : : "i"(offsetof (struct _Unwind_Context, bsp))); + asm volatile ("uc_psp = %0" + : : "i"(offsetof (struct _Unwind_Context, psp))); + asm volatile ("uc_rp = %0" + : : "i"(offsetof (struct _Unwind_Context, rp))); + asm volatile ("uc_pr = %0" + : : "i"(offsetof (struct _Unwind_Context, pr))); + asm volatile ("uc_gp = %0" + : : "i"(offsetof (struct _Unwind_Context, gp))); + asm volatile ("uc_pfs_loc = %0" + : : "i"(offsetof (struct _Unwind_Context, pfs_loc))); + asm volatile ("uc_unat_loc = %0" + : : "i"(offsetof (struct _Unwind_Context, unat_loc))); + asm volatile ("uc_lc_loc = %0" + : : "i"(offsetof (struct _Unwind_Context, lc_loc))); + asm volatile ("uc_fpsr_loc = %0" + : : "i"(offsetof (struct _Unwind_Context, fpsr_loc))); + asm volatile ("uc_eh_data = %0" + : : "i"(offsetof (struct _Unwind_Context, eh_data))); + asm volatile ("uc_br_loc = %0" + : : "i"(offsetof (struct _Unwind_Context, br_loc))); + asm volatile ("uc_fr_loc = %0" + : : "i"(offsetof (struct _Unwind_Context, fr_loc))); + + asm volatile ( + /* Load up call-saved non-window integer registers from ireg_buf. */ + "add r20 = 8, %1 \n\t" + "mov ar.unat = %2 \n\t" + "mov pr = %3, 0x3c0 \n\t" + ";; \n\t" + "(p6) ld8.fill r4 = [%1] \n\t" + "(p7) ld8.fill r5 = [r20] \n\t" + "add r21 = uc_br_loc + 8, %0 \n\t" + "adds %1 = 16, %1 \n\t" + "adds r20 = 16, r20 \n\t" + ";; \n\t" + "(p8) ld8.fill r6 = [%1] \n\t" + "(p9) ld8.fill r7 = [r20] \n\t" + "add r20 = uc_br_loc, %0 \n\t" + ";; \n\t" + /* Load up call-saved branch registers. */ + "ld8 r22 = [r20], 16 \n\t" + "ld8 r23 = [r21], 16 \n\t" + ";; \n\t" + "ld8 r24 = [r20], 16 \n\t" + "ld8 r25 = [r21], uc_fr_loc - (uc_br_loc + 24)\n\t" + ";; \n\t" + "ld8 r26 = [r20], uc_fr_loc + 8 - (uc_br_loc + 32)\n\t" + "ld8 r27 = [r21], 24 \n\t" + "cmp.ne p6, p0 = r0, r22 \n\t" + ";; \n\t" + "ld8 r28 = [r20], 8 \n\t" + "(p6) ld8 r22 = [r22] \n\t" + "cmp.ne p7, p0 = r0, r23 \n\t" + ";; \n\t" + "(p7) ld8 r23 = [r23] \n\t" + "cmp.ne p8, p0 = r0, r24 \n\t" + ";; \n\t" + "(p8) ld8 r24 = [r24] \n\t" + "(p6) mov b1 = r22 \n\t" + "cmp.ne p9, p0 = r0, r25 \n\t" + ";; \n\t" + "(p9) ld8 r25 = [r25] \n\t" + "(p7) mov b2 = r23 \n\t" + "cmp.ne p6, p0 = r0, r26 \n\t" + ";; \n\t" + "(p6) ld8 r26 = [r26] \n\t" + "(p8) mov b3 = r24 \n\t" + "cmp.ne p7, p0 = r0, r27 \n\t" + ";; \n\t" + /* Load up call-saved fp registers. */ + "(p7) ldf.fill f2 = [r27] \n\t" + "(p9) mov b4 = r25 \n\t" + "cmp.ne p8, p0 = r0, r28 \n\t" + ";; \n\t" + "(p8) ldf.fill f3 = [r28] \n\t" + "(p6) mov b5 = r26 \n\t" + ";; \n\t" + "ld8 r29 = [r20], 16*8 - 4*8 \n\t" + "ld8 r30 = [r21], 17*8 - 5*8 \n\t" + ";; \n\t" + "ld8 r22 = [r20], 16 \n\t" + "ld8 r23 = [r21], 16 \n\t" + ";; \n\t" + "ld8 r24 = [r20], 16 \n\t" + "ld8 r25 = [r21] \n\t" + "cmp.ne p6, p0 = r0, r29 \n\t" + ";; \n\t" + "ld8 r26 = [r20], 8 \n\t" + "(p6) ldf.fill f4 = [r29] \n\t" + "cmp.ne p7, p0 = r0, r30 \n\t" + ";; \n\t" + "ld8 r27 = [r20], 8 \n\t" + "(p7) ldf.fill f5 = [r30] \n\t" + "cmp.ne p6, p0 = r0, r22 \n\t" + ";; \n\t" + "ld8 r28 = [r20], 8 \n\t" + "(p6) ldf.fill f16 = [r22] \n\t" + "cmp.ne p7, p0 = r0, r23 \n\t" + ";; \n\t" + "ld8 r29 = [r20], 8 \n\t" + "(p7) ldf.fill f17 = [r23] \n\t" + "cmp.ne p6, p0 = r0, r24 \n\t" + ";; \n\t" + "ld8 r22 = [r20], 8 \n\t" + "(p6) ldf.fill f18 = [r24] \n\t" + "cmp.ne p7, p0 = r0, r25 \n\t" + ";; \n\t" + "ld8 r23 = [r20], 8 \n\t" + "(p7) ldf.fill f19 = [r25] \n\t" + "cmp.ne p6, p0 = r0, r26 \n\t" + ";; \n\t" + "ld8 r24 = [r20], 8 \n\t" + "(p6) ldf.fill f20 = [r26] \n\t" + "cmp.ne p7, p0 = r0, r27 \n\t" + ";; \n\t" + "ld8 r25 = [r20], 8 \n\t" + "(p7) ldf.fill f21 = [r27] \n\t" + "cmp.ne p6, p0 = r0, r28 \n\t" + ";; \n\t" + "ld8 r26 = [r20], 8 \n\t" + "(p6) ldf.fill f22 = [r28] \n\t" + "cmp.ne p7, p0 = r0, r29 \n\t" + ";; \n\t" + "ld8 r28 = [r20], 8 \n\t" + "(p7) ldf.fill f23 = [r29] \n\t" + "cmp.ne p6, p0 = r0, r22 \n\t" + ";; \n\t" + "ld8 r29 = [r20], 8 \n\t" + "(p6) ldf.fill f24 = [r22] \n\t" + "cmp.ne p7, p0 = r0, r23 \n\t" + ";; \n\t" + "(p7) ldf.fill f25 = [r23] \n\t" + "cmp.ne p6, p0 = r0, r24 \n\t" + "cmp.ne p7, p0 = r0, r25 \n\t" + ";; \n\t" + "(p6) ldf.fill f26 = [r24] \n\t" + "(p7) ldf.fill f27 = [r25] \n\t" + "cmp.ne p6, p0 = r0, r26 \n\t" + ";; \n\t" + "(p6) ldf.fill f28 = [r26] \n\t" + "cmp.ne p7, p0 = r0, r27 \n\t" + "cmp.ne p6, p0 = r0, r28 \n\t" + ";; \n\t" + "(p7) ldf.fill f29 = [r27] \n\t" + "(p6) ldf.fill f30 = [r28] \n\t" + "cmp.ne p7, p0 = r0, r29 \n\t" + ";; \n\t" + "(p7) ldf.fill f31 = [r29] \n\t" + "add r20 = uc_rnat, %0 \n\t" + "add r21 = uc_bsp, %0 \n\t" + ";; \n\t" + /* Load the balance of the thread state from the context. */ + "ld8 r22 = [r20], uc_psp - uc_rnat \n\t" + "ld8 r23 = [r21], uc_gp - uc_bsp \n\t" + ";; \n\t" + "ld8 r24 = [r20], uc_pfs_loc - uc_psp \n\t" + "ld8 r1 = [r21], uc_rp - uc_gp \n\t" + ";; \n\t" + "ld8 r25 = [r20], uc_unat_loc - uc_pfs_loc\n\t" + "ld8 r26 = [r21], uc_pr - uc_rp \n\t" + ";; \n\t" + "ld8 r27 = [r20], uc_lc_loc - uc_unat_loc\n\t" + "ld8 r28 = [r21], uc_fpsr_loc - uc_pr \n\t" + ";; \n\t" + "ld8 r29 = [r20], uc_eh_data - uc_lc_loc\n\t" + "ld8 r30 = [r21], uc_eh_data + 8 - uc_fpsr_loc\n\t" + ";; \n\t" + /* Load data for the exception handler. */ + "ld8 r15 = [r20], 16 \n\t" + "ld8 r16 = [r21], 16 \n\t" + ";; \n\t" + "ld8 r17 = [r20] \n\t" + "ld8 r18 = [r21] \n\t" + ";; \n\t" + /* Install the balance of the thread state loaded above. */ + "cmp.ne p6, p0 = r0, r25 \n\t" + "cmp.ne p7, p0 = r0, r27 \n\t" + ";; \n\t" + "(p6) ld8 r25 = [r25] \n\t" + "(p7) ld8 r27 = [r27] \n\t" + ";; \n\t" + "(p7) mov.m ar.unat = r27 \n\t" + "(p6) mov.i ar.pfs = r25 \n\t" + "cmp.ne p9, p0 = r0, r29 \n\t" + ";; \n\t" + "(p9) ld8 r29 = [r29] \n\t" + "cmp.ne p6, p0 = r0, r30 \n\t" + ";; \n\t" + "(p6) ld8 r30 = [r30] \n\t" + /* Don't clobber p6-p9, which are in use at present. */ + "mov pr = r28, ~0x3c0 \n\t" + "(p9) mov.i ar.lc = r29 \n\t" + ";; \n\t" + "mov.m r25 = ar.rsc \n\t" + "(p6) mov.i ar.fpsr = r30 \n\t" + ";; \n\t" + "and r25 = 0x1c, r25 \n\t" + "mov b0 = r26 \n\t" + ";; \n\t" + "mov.m ar.rsc = r25 \n\t" + ";; \n\t" + /* This must be done before setting AR.BSPSTORE, otherwise + AR.BSP will be initialized with a random displacement + below the value we want, based on the current number of + dirty stacked registers. */ + "loadrs \n\t" + "invala \n\t" + ";; \n\t" + "mov.m ar.bspstore = r23 \n\t" + ";; \n\t" + "or r25 = 0x3, r25 \n\t" + "mov.m ar.rnat = r22 \n\t" + ";; \n\t" + "mov.m ar.rsc = r25 \n\t" + "mov sp = r24 \n\t" + "br.ret.sptk.few b0" + : : "r"(target), "r"(ireg_buf), "r"(ireg_nat), "r"(ireg_pr) + : "r15", "r16", "r17", "r18", "r20", "r21", "r22", + "r23", "r24", "r25", "r26", "r27", "r28", "r29", + "r30", "r31"); + /* NOTREACHED */ + while (1); +} + +static inline _Unwind_Ptr +uw_identify_context (struct _Unwind_Context *context) +{ + return _Unwind_GetIP (context); +} + +#include "unwind.inc" +#endif diff --git a/contrib/gcc/config/ia64/unwind-ia64.h b/contrib/gcc/config/ia64/unwind-ia64.h new file mode 100644 index 0000000..a6b850d --- /dev/null +++ b/contrib/gcc/config/ia64/unwind-ia64.h @@ -0,0 +1,31 @@ +/* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + Contributed by Andrew MacLeod <amacleod@cygnus.com> + Andrew Haley <aph@cygnus.com> + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +struct unw_table_entry +{ + unsigned long start_offset; + unsigned long end_offset; + unsigned long info_offset; +}; + +extern struct unw_table_entry * +_Unwind_FindTableEntry (void *pc, unsigned long *segment_base, + unsigned long *gp); |