1 files changed, 848 insertions, 185 deletions
diff --git a/contrib/gcc/config/i386/athlon.md b/contrib/gcc/config/i386/athlon.md
index 548f2ad..4ce9a38 100644
--- a/contrib/gcc/config/i386/athlon.md
+++ b/contrib/gcc/config/i386/athlon.md
@@ -1,34 +1,5 @@
 ;; AMD Athlon Scheduling
-;; Copyright (C) 2002 Free Software Foundation, Inc.
 ;;
-;; This file is part of GNU CC.
-;;
-;; GNU CC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
-;;
-;; GNU CC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GNU CC; see the file COPYING.  If not, write to
-;; the Free Software Foundation, 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.  */
-(define_attr "athlon_decode" "direct,vector"
-  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
-	   (const_string "vector")
-         (and (eq_attr "type" "push")
-              (match_operand 1 "memory_operand" ""))
-	   (const_string "vector")
-         (and (eq_attr "type" "fmov")
-	      (and (eq_attr "memory" "load,store")
-		   (eq_attr "mode" "XF")))
-	   (const_string "vector")]
-	(const_string "direct")))
-
 ;; The Athlon does contain three pipelined FP units, three integer units and
 ;; three address generation units. 
 ;;
@@ -46,161 +17,853 @@
 ;; The load/store queue unit is not attached to the schedulers but
 ;; communicates with all the execution units separately instead.
 
-(define_function_unit "athlon_vectordec" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_decode" "vector"))
-  1 1)
-
-(define_function_unit "athlon_directdec" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_decode" "direct"))
-  1 1)
-
-(define_function_unit "athlon_vectordec" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_decode" "direct"))
-  1 1 [(eq_attr "athlon_decode" "vector")])
-
-(define_function_unit "athlon_ieu" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
-  1 1)
-
-(define_function_unit "athlon_ieu" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "str"))
-  15 15)
-
-(define_function_unit "athlon_ieu" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "imul"))
-  5 0)
-
-(define_function_unit "athlon_ieu" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "idiv"))
-  42 0)
-
-(define_function_unit "athlon_muldiv" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "imul"))
-  5 0)
-
-(define_function_unit "athlon_muldiv" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "idiv"))
-  42 42)
-
-(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
-  (cond [(eq_attr "type" "fop,fcmp,fistp")
-	   (const_string "add")
-         (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
-	   (const_string "mul")
-	 (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
-	   (const_string "store")
-	 (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
-	   (const_string "any")
+(define_attr "athlon_decode" "direct,vector,double"
+  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
+	   (const_string "vector")
+         (and (eq_attr "type" "push")
+              (match_operand 1 "memory_operand" ""))
+	   (const_string "vector")
          (and (eq_attr "type" "fmov")
-              (ior (match_operand:SI 1 "register_operand" "")
-                   (match_operand 1 "immediate_operand" "")))
-	   (const_string "store")
-         (eq_attr "type" "fmov")
-	   (const_string "muladd")]
-	(const_string "none")))
-
-;; We use latencies 1 for definitions.  This is OK to model colisions
-;; in execution units.  The real latencies are modeled in the "fp" pipeline.
-
-;; fsin, fcos: 96-192
-;; fsincos: 107-211
-;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "fpspc"))
-  100 1)
-
-;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "fdiv"))
-  24 1)
-
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "fop,fmul,fistp"))
-  4 1)
-
-;; XFmode loads are slow.
-;; XFmode store is slow too (8 cycles), but we don't need to model it, because
-;; there are no dependent instructions.
-
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (and (eq_attr "type" "fmov")
-	    (and (eq_attr "memory" "load")
-		 (eq_attr "mode" "XF"))))
-  10 1)
-
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "fmov,fsgn"))
-  2 1)
-
-;; fcmp and ftst instructions
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (and (eq_attr "type" "fcmp")
-	    (eq_attr "athlon_decode" "direct")))
-  3 1)
-
-;; fcmpi instructions.
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (and (eq_attr "type" "fcmp")
-	    (eq_attr "athlon_decode" "vector")))
-  3 1)
-
-(define_function_unit "athlon_fp" 3 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "type" "fcmov"))
-  7 1)
-
-(define_function_unit "athlon_fp_mul" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_fpunits" "mul"))
-  1 1)
-
-(define_function_unit "athlon_fp_add" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_fpunits" "add"))
-  1 1)
-
-(define_function_unit "athlon_fp_muladd" 2 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_fpunits" "muladd,mul,add"))
-  1 1)
-
-(define_function_unit "athlon_fp_store" 1 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "athlon_fpunits" "store"))
-  1 1)
-
-;; We don't need to model the Address Generation Unit, since we don't model
-;; the re-order buffer yet and thus we never schedule more than three operations
-;; at time.  Later we may want to experiment with MD_SCHED macros modeling the
-;; decoders independently on the functional units.
-
-;(define_function_unit "athlon_agu" 3 0
-;  (and (eq_attr "cpu" "athlon")
-;       (and (eq_attr "memory" "!none")
-;            (eq_attr "athlon_fpunits" "none")))
-;  1 1)
-
-;; Model load unit to avoid too long sequences of loads.  We don't need to
-;; model store queue, since it is hardly going to be bottleneck.
-
-(define_function_unit "athlon_load" 2 0
-  (and (eq_attr "cpu" "athlon")
-       (eq_attr "memory" "load,both"))
-  1 1)
+	      (and (eq_attr "memory" "load,store")
+		   (eq_attr "mode" "XF")))
+	   (const_string "vector")]
+	(const_string "direct")))
+
+;;
+;;           decode0 decode1 decode2
+;;                 \    |   /
+;;    instruction control unit (72 entry scheduler)
+;;                |                        |
+;;      integer scheduler (18)         stack map
+;;     /  |    |    |    |   \        stack rename
+;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
+;;    |  agu0  |   agu1      agu2    register file
+;;    |      \ |    |       /         |     |     |
+;;     \      /\    |     /         fadd  fmul  fstore
+;;       \  /    \  |   /           fadd  fmul  fstore
+;;       imul  load/store (2x)      fadd  fmul  fstore
+
+(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_cpu_unit "athlon-decodev" "athlon")
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow troughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too. Vector decoded instructions then can't be issued when
+;; modeled as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "athlon-decode2" "athlon-decode0")
+(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
+(define_reservation "athlon-vector" "nothing,athlon-decodev")
+(define_reservation "athlon-direct0" "nothing,athlon-decode0")
+(define_reservation "athlon-direct" "nothing,
+				     (athlon-decode0 | athlon-decode1
+				     | athlon-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
+				     | (nothing,(athlon-decode0 + athlon-decode1))
+				     | (nothing,(athlon-decode1 + athlon-decode2)))")
+
+;; Agu and ieu unit results in extremely large automatons and
+;; in our approximation they are hardly filled in.  Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations.  Skip the models.
+
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+(define_cpu_unit "athlon-ieu0" "athlon")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing")
+
+(define_cpu_unit "athlon-mult" "athlon_mult")
+
+(define_cpu_unit "athlon-load0" "athlon_load")
+(define_cpu_unit "athlon-load1" "athlon_load")
+(define_reservation "athlon-load" "athlon-agu,
+				   (athlon-load0 | athlon-load1),nothing")
+;; 128bit SSE instructions issue two loads at once
+(define_reservation "athlon-load2" "athlon-agu,
+				   (athlon-load0 + athlon-load1),nothing")
+
+(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
+;; 128bit SSE instructions issue two stores at once
+(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
+
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for Athlon and 11 for K8
+;; Compensate the difference for Athlon because it results in significantly
+;; smaller automata.
+(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
+(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
+
+
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
+
+
+;; Jump instructions are executed in the branch unit completely transparent to us
+(define_insn_reservation "athlon_branch" 0
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "ibr"))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_call" 0
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "call,callv"))
+			 "athlon-vector,athlon-ieu")
+
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "push"))
+			 "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "pop"))
+			 "athlon-vector,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_pop_k8" 3
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "pop"))
+			 "athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "leave"))
+			 "athlon-vector,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave_k8" 3
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "leave"))
+			 "athlon-double,(athlon-ieu+athlon-load)")
+
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "lea"))
+			 "athlon-direct,athlon-agu,nothing")
+
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
+;; ??? Widening multiply is vector or double.
+(define_insn_reservation "athlon_imul_k8_DI" 4
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_k8" 3
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_mem" 8
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8_DI" 7
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8" 6
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
+
+;; Idiv can not execute in parallel with other instructions.  Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
+;; of the other code
+
+(define_insn_reservation "athlon_idiv" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
+(define_insn_reservation "athlon_idiv_mem" 9
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
+;; The parallelism of string instructions is not documented.  Model it same way
+;; as idiv to create smaller automata.  This probably does not matter much.
+(define_insn_reservation "athlon_str" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "athlon-vector,athlon-load,athlon-ieu0*6")
+
+(define_insn_reservation "athlon_idirect" 1
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_loadmov" 3
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-load")
+(define_insn_reservation "athlon_idirect_load" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_movstore" 1
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_idirect_both" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-direct,athlon-load,
+			  athlon-ieu,athlon-store,
+			  athlon-store")
+(define_insn_reservation "athlon_ivector_both" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-vector,athlon-load,
+			  athlon-ieu,
+			  athlon-ieu,
+			  athlon-store")
+(define_insn_reservation "athlon_idirect_store" 1
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-direct,(athlon-ieu+athlon-agu),
+			  athlon-store")
+(define_insn_reservation "athlon_ivector_store" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+			  athlon-store")
+
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
+(define_insn_reservation "athlon_fldxf_k8" 13
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
+;; Assume superforwarding to take place so effective latency of fany op is 0.
+(define_insn_reservation "athlon_fld" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_fld_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+
+(define_insn_reservation "athlon_fstxf" 10
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
+(define_insn_reservation "athlon_fstxf_k8" 8
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
+(define_insn_reservation "athlon_fst" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fst_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fist" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fistp"))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fmov" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fmov"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fadd_load_k8" 6
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fop"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fmul_load_k8" 6
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fmul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fsgn"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load_k8" 13
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "fdiv"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_k8" 11
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "fdiv"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fpspc" 100
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fpspc"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load" 7
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fcmov" 7
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "fcmov"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load_k8" 17
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_k8" 15
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "fcmov"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+;; fcomi is vector decoded by uses only one pipe.
+(define_insn_reservation "athlon_fcomi_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "athlon_decode" "vector")
+				        (eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcomi_load_k8" 5
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "athlon_decode" "vector")
+				        (eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (eq_attr "type" "fcmp")))
+			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load_k8" 4
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "fcmp"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+;; Never seen by the scheduler because we still don't do post reg-stack
+;; scheduling.
+;(define_insn_reservation "athlon_fxch" 2
+;			 (and (eq_attr "cpu" "athlon,k8")
+;			      (eq_attr "type" "fxch"))
+;			 "athlon-direct,athlon-fpsched,athlon-fany")
+
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+
+(define_insn_reservation "athlon_movlpd_load" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand" "")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_movlpd_load_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand" "")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_movaps_load_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_movaps_load" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
+(define_insn_reservation "athlon_movss_load" 1
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
+(define_insn_reservation "athlon_movss_load_k8" 1
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
+(define_insn_reservation "athlon_mmxsseld" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_mmxsseld_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_mmxssest" 3
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_k8" 3
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_short" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_movaps" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "mode" "V4SF,V2DF,TI")))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
+(define_insn_reservation "athlon_movaps_k8" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "mode" "V4SF,V2DF,TI")))
+			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
+(define_insn_reservation "athlon_mmxssemov" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "mmxmov,ssemov"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "mmxmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "mmxmul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 3
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "unit" "mmx")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "unit" "mmx"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well.  The latency
+;; is same as for i387 operations for scalar operations
+
+(define_insn_reservation "athlon_sselog_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sselog")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_k8" 5
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "sselog")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "sselog"))
+			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
+(define_insn_reservation "athlon_sselog_k8" 3
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "sselog"))
+			 "athlon-double,athlon-fpsched,athlon-fmul")
+;; ??? pcmp executes in addmul, probably not wortwhile to brother about that.
+(define_insn_reservation "athlon_ssecmp_load" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecmp")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp_load_k8" 4
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssecmp")
+				   (and (eq_attr "mode" "SF,DF,DI,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "mode" "SF,DF,DI,TI")))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_k8" 3
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecomi_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_k8" 6
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseadd")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load_k8" 6
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "sseadd")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "mode" "SF,DF,DI")))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_k8" 7
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "sseadd"))
+			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_k8" 5
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "sseadd"))
+			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately.  Assume that the mode is always set to the
+;; destination one and athlon_decode is set to the K8 versions.
+
+;; cvtss2sd
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
+			 (and (eq_attr "cpu" "k8,athlon")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(and (eq_attr "mode" "DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(eq_attr "mode" "DF"))))
+			 "athlon-direct,athlon-fpsched,athlon-fstore")
+;; cvtps2pd.  Model same way the other double decoded FP conversions.
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
+			 (and (eq_attr "cpu" "k8,athlon")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "V2DF,V4SF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
+			 (and (eq_attr "cpu" "k8,athlon")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(eq_attr "mode" "V2DF,V4SF,TI"))))
+			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
+;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+;; cvtsi2ss mem, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
+			 (and (eq_attr "cpu" "k8,athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fploadk8,athlon-fstore")
+;; cvtsi2ss reg, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
+			 (and (eq_attr "cpu" "k8,athlon")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
+;; ??? Why it is fater than cvtsd2ss?
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
+;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,athlon-fstore")
+
 
+(define_insn_reservation "athlon_ssemul_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemul")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_ssemul_load_k8" 6
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemul")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "mode" "SF,DF")))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_k8" 7
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_k8" 5
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
+;; divsd timings.  divss is faster
+(define_insn_reservation "athlon_ssediv_load" 20
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv_load_k8" 22
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv" 20
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "mode" "SF,DF")))
+			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector_load" 39
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_k8" 35
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector" 39
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-vector,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_k8" 39
+			 (and (eq_attr "cpu" "k8")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-double,athlon-fmul*34")