diff options
Diffstat (limited to 'contrib/gcc/config/i386/athlon.md')
-rw-r--r-- | contrib/gcc/config/i386/athlon.md | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/contrib/gcc/config/i386/athlon.md b/contrib/gcc/config/i386/athlon.md new file mode 100644 index 0000000..548f2ad --- /dev/null +++ b/contrib/gcc/config/i386/athlon.md @@ -0,0 +1,206 @@ +;; AMD Athlon Scheduling +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +(define_attr "athlon_decode" "direct,vector" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF"))) + (const_string "vector")] + (const_string "direct"))) + +;; The Athlon does contain three pipelined FP units, three integer units and +;; three address generation units. +;; +;; The predecode logic is determining boundaries of instructions in the 64 +;; byte cache line. So the cache line straddling problem of K6 might be issue +;; here as well, but it is not noted in the documentation. +;; +;; Three DirectPath instructions decoders and only one VectorPath decoder +;; is available. They can decode three DirectPath instructions or one VectorPath +;; instruction per cycle. +;; Decoded macro instructions are then passed to 72 entry instruction control +;; unit, that passes +;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. +;; +;; The load/store queue unit is not attached to the schedulers but +;; communicates with all the execution units separately instead. + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "vector")) + 1 1) + +(define_function_unit "athlon_directdec" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1) + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1 [(eq_attr "athlon_decode" "vector")]) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) + 1 1) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "str")) + 15 15) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 42 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 42 42) + +(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" + (cond [(eq_attr "type" "fop,fcmp,fistp") + (const_string "add") + (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") + (const_string "mul") + (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) + (const_string "store") + (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) + (const_string "any") + (and (eq_attr "type" "fmov") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "store") + (eq_attr "type" "fmov") + (const_string "muladd")] + (const_string "none"))) + +;; We use latencies 1 for definitions. This is OK to model colisions +;; in execution units. The real latencies are modeled in the "fp" pipeline. + +;; fsin, fcos: 96-192 +;; fsincos: 107-211 +;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fpspc")) + 100 1) + +;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fdiv")) + 24 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fop,fmul,fistp")) + 4 1) + +;; XFmode loads are slow. +;; XFmode store is slow too (8 cycles), but we don't need to model it, because +;; there are no dependent instructions. + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + 10 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fmov,fsgn")) + 2 1) + +;; fcmp and ftst instructions +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "athlon_decode" "direct"))) + 3 1) + +;; fcmpi instructions. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "athlon_decode" "vector"))) + 3 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmov")) + 7 1) + +(define_function_unit "athlon_fp_mul" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "mul")) + 1 1) + +(define_function_unit "athlon_fp_add" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "add")) + 1 1) + +(define_function_unit "athlon_fp_muladd" 2 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "muladd,mul,add")) + 1 1) + +(define_function_unit "athlon_fp_store" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "store")) + 1 1) + +;; We don't need to model the Address Generation Unit, since we don't model +;; the re-order buffer yet and thus we never schedule more than three operations +;; at time. Later we may want to experiment with MD_SCHED macros modeling the +;; decoders independently on the functional units. + +;(define_function_unit "athlon_agu" 3 0 +; (and (eq_attr "cpu" "athlon") +; (and (eq_attr "memory" "!none") +; (eq_attr "athlon_fpunits" "none"))) +; 1 1) + +;; Model load unit to avoid too long sequences of loads. We don't need to +;; model store queue, since it is hardly going to be bottleneck. + +(define_function_unit "athlon_load" 2 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "memory" "load,both")) + 1 1) + |