From 6be340ca746d2a579e71248a98a638f162bc2a02 Mon Sep 17 00:00:00 2001 From: mm Date: Mon, 7 Mar 2011 14:48:22 +0000 Subject: Backport Intel Core 2 and AMD Geode CPU types from gcc-4.3 (GPLv2) These options are supported in this shape in all newer GCC versions. PR: gnu/155308 Obtained from: gcc 4.3 (rev. 118090, 118973, 120846; GPLv2) MFC after: 2 weeks --- contrib/gcc/config.gcc | 12 ++- contrib/gcc/config/i386/geode.md | 153 +++++++++++++++++++++++++++++++++ contrib/gcc/config/i386/i386.c | 181 ++++++++++++++++++++++++++++++++------- contrib/gcc/config/i386/i386.h | 46 +++++++--- contrib/gcc/config/i386/i386.md | 3 +- contrib/gcc/doc/gcc.1 | 9 +- contrib/gcc/doc/invoke.texi | 5 ++ 7 files changed, 361 insertions(+), 48 deletions(-) create mode 100644 contrib/gcc/config/i386/geode.md (limited to 'contrib/gcc') diff --git a/contrib/gcc/config.gcc b/contrib/gcc/config.gcc index 9a142e2..b2364cc 100644 --- a/contrib/gcc/config.gcc +++ b/contrib/gcc/config.gcc @@ -1207,14 +1207,14 @@ i[34567]86-*-solaris2*) # FIXME: -m64 for i[34567]86-*-* should be allowed just # like -m32 for x86_64-*-*. case X"${with_cpu}" in - Xgeneric|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx) + Xgeneric|Xcore2|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx) ;; X) with_cpu=generic ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2 + echo "generic core2 nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2 exit 1 ;; esac @@ -2537,6 +2537,9 @@ if test x$with_cpu = x ; then nocona-*) with_cpu=nocona ;; + core2-*) + with_cpu=core2 + ;; pentium_m-*) with_cpu=pentium-m ;; @@ -2556,6 +2559,9 @@ if test x$with_cpu = x ; then nocona-*) with_cpu=nocona ;; + core2-*) + with_cpu=core2 + ;; *) with_cpu=generic ;; @@ -2787,7 +2793,7 @@ case "${target}" in esac # OK ;; - "" | k8 | opteron | athlon64 | athlon-fx | nocona | generic) + "" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic) # OK ;; *) diff --git a/contrib/gcc/config/i386/geode.md b/contrib/gcc/config/i386/geode.md new file mode 100644 index 0000000..050216a --- /dev/null +++ b/contrib/gcc/config/i386/geode.md @@ -0,0 +1,153 @@ +;; Geode Scheduling +;; Copyright (C) 2006 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. +;; +;; The Geode architecture is one insn issue processor. +;; +;; This description is based on data from the following documents: +;; +;; "AMD Geode GX Processor Data Book" +;; Advanced Micro Devices, Inc., Aug 2005. +;; +;; "AMD Geode LX Processor Data Book" +;; Advanced Micro Devices, Inc., Jan 2006. +;; +;; +;; CPU execution units of the Geode: +;; +;; issue describes the issue pipeline. +;; alu describes the Integer unit +;; fpu describes the FP unit +;; +;; The fp unit is out of order execution unit with register renaming. +;; There is also memory management unit and execution pipeline for +;; load/store operations. We ignore it and difference between insns +;; using memory and registers. + +(define_automaton "geode") + +(define_cpu_unit "geode_issue,geode_alu,geode_fpu" "geode") + +(define_insn_reservation "alu" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "alu,alu1,negnot,icmp,lea,test,imov,imovx,icmov,incdec,setcc")) + "geode_issue,geode_alu") + +(define_insn_reservation "shift" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "ishift,ishift1,rotate,rotate1,cld")) + "geode_issue,geode_alu*2") + +(define_insn_reservation "imul" 7 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "imul")) + "geode_issue,geode_alu*7") + +(define_insn_reservation "idiv" 40 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "idiv")) + "geode_issue,geode_alu*40") + +;; The branch unit. +(define_insn_reservation "call" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "call,callv")) + "geode_issue,geode_alu*2") + +(define_insn_reservation "geode_branch" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "ibr")) + "geode_issue,geode_alu") + +(define_insn_reservation "geode_pop_push" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "pop,push")) + "geode_issue,geode_alu") + +(define_insn_reservation "geode_leave" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "leave")) + "geode_issue,geode_alu*2") + +(define_insn_reservation "geode_load_str" 4 + (and (eq_attr "cpu" "geode") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both"))) + "geode_issue,geode_alu*4") + +(define_insn_reservation "geode_store_str" 2 + (and (eq_attr "cpu" "geode") + (and (eq_attr "type" "str") + (eq_attr "memory" "store"))) + "geode_issue,geode_alu*2") + +;; Be optimistic +(define_insn_reservation "geode_unknown" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "multi,other")) + "geode_issue,geode_alu") + +;; FPU + +(define_insn_reservation "geode_fop" 6 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fop,fcmp")) + "geode_issue,geode_fpu*6") + +(define_insn_reservation "geode_fsimple" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fmov,fcmov,fsgn,fxch")) + "geode_issue,geode_fpu") + +(define_insn_reservation "geode_fist" 4 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fistp,fisttp")) + "geode_issue,geode_fpu*4") + +(define_insn_reservation "geode_fmul" 10 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fmul")) + "geode_issue,geode_fpu*10") + +(define_insn_reservation "geode_fdiv" 47 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fdiv")) + "geode_issue,geode_fpu*47") + +;; We use minimal latency (fsin) here +(define_insn_reservation "geode_fpspc" 54 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fpspc")) + "geode_issue,geode_fpu*54") + +(define_insn_reservation "geode_frndint" 12 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "frndint")) + "geode_issue,geode_fpu*12") + +(define_insn_reservation "geode_mmxmov" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "mmxmov")) + "geode_issue,geode_fpu") + +(define_insn_reservation "geode_mmx" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "mmx,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")) + "geode_issue,geode_fpu*2") diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c index 18293d9..4ed7c30 100644 --- a/contrib/gcc/config/i386/i386.c +++ b/contrib/gcc/config/i386/i386.c @@ -336,6 +336,60 @@ struct processor_costs pentiumpro_cost = { }; static const +struct processor_costs geode_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (2), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (7), /* SI */ + COSTS_N_INSNS (7), /* DI */ + COSTS_N_INSNS (7)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (23), /* HI */ + COSTS_N_INSNS (39), /* SI */ + COSTS_N_INSNS (39), /* DI */ + COSTS_N_INSNS (39)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 4, /* MOVE_RATIO */ + 1, /* cost for loading QImode using movzbl */ + {1, 1, 1}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {1, 1, 1}, /* cost of storing integer registers */ + 1, /* cost of reg,reg fld/fst */ + {1, 1, 1}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 6, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + + 1, /* cost of moving MMX register */ + {1, 1}, /* cost of loading MMX registers + in SImode and DImode */ + {1, 1}, /* cost of storing MMX registers + in SImode and DImode */ + 1, /* cost of moving SSE register */ + {1, 1, 1}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {1, 1, 1}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 1, /* MMX or SSE register to integer */ + 32, /* size of prefetch block */ + 1, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (11), /* cost of FMUL instruction. */ + COSTS_N_INSNS (47), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ +}; + +static const struct processor_costs k6_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ COSTS_N_INSNS (2), /* cost of a lea instruction */ @@ -600,6 +654,58 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ }; +static const +struct processor_costs core2_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (22), /* HI */ + COSTS_N_INSNS (22), /* SI */ + COSTS_N_INSNS (22), /* DI */ + COSTS_N_INSNS (22)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 16, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of loading integer registers */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {6, 6, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 128, /* size of prefetch block */ + 8, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (32), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ +}; + /* Generic64 should produce code tuned for Nocona and K8. */ static const struct processor_costs generic64_cost = { @@ -721,38 +827,41 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_486 (1<